{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0059880239520957, "eval_steps": 500, "global_step": 670, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "logps_train/policy_1_2": -206.92308044433594, "logps_train/policy_1_l": -123.64761352539062, "logps_train/policy_1_w": -143.06700134277344, "logps_train/policy_2_2": -178.97097778320312, "logps_train/policy_2_w": -174.55642700195312, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 0.036987822502851486, "rewards_train/1-l": 0.016097292304039, "rewards_train/1-w": 0.00540950195863843, "rewards_train/2-2": 0.026339687407016754, "rewards_train/2-w": -0.009548693895339966, "rewards_train/accuracies": 0.5625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": -0.010687790345400572, "rewards_train/margins_1": -0.03157832054421306, "rewards_train/margins_2": 0.03588838130235672, "step": 0 }, { "epoch": 0, "logps_train/policy_1_2": -140.866455078125, "logps_train/policy_1_l": -184.57049560546875, "logps_train/policy_1_w": -166.25738525390625, "logps_train/policy_2_2": -123.08819580078125, "logps_train/policy_2_w": -186.00238037109375, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 0.0008539031259715557, "rewards_train/1-l": -0.004119357094168663, "rewards_train/1-w": -0.0029853745363652706, "rewards_train/2-2": 0.00299603259190917, "rewards_train/2-w": 0.007574762217700481, "rewards_train/accuracies": 0.5625, "rewards_train/accuracies_1": 0.4375, "rewards_train/accuracies_2": 0.4375, "rewards_train/margins": 0.0011339825578033924, "rewards_train/margins_1": -0.0038392776623368263, "rewards_train/margins_2": -0.004578729625791311, "step": 0 }, { "epoch": 0, "logps_train/policy_1_2": -103.5050048828125, "logps_train/policy_1_l": -85.81730651855469, "logps_train/policy_1_w": -86.37541198730469, "logps_train/policy_2_2": -84.15333557128906, "logps_train/policy_2_w": -113.88278198242188, "logps_train/ref_1_2": -103.5, "logps_train/ref_1_l": -85.5, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": -0.0028442861512303352, "rewards_train/1-l": -0.007903080433607101, "rewards_train/1-w": -0.016838300973176956, "rewards_train/2-2": 0.01357260998338461, "rewards_train/2-w": 0.02226894535124302, "rewards_train/accuracies": 0.4375, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": -0.008935220539569855, "rewards_train/margins_1": -0.013994014821946621, "rewards_train/margins_2": -0.00869633536785841, "step": 0 }, { "epoch": 0, "logps_train/policy_1_2": -143.43370056152344, "logps_train/policy_1_l": -153.40306091308594, "logps_train/policy_1_w": -160.6190948486328, "logps_train/policy_2_2": -124.49246978759766, "logps_train/policy_2_w": -181.51519775390625, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -0.011143648996949196, "rewards_train/1-l": 0.016432570293545723, "rewards_train/1-w": -0.0017539500258862972, "rewards_train/2-2": 0.0013388809747993946, "rewards_train/2-w": -0.002300291322171688, "rewards_train/accuracies": 0.3125, "rewards_train/accuracies_1": 0.4375, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": -0.01818652031943202, "rewards_train/margins_1": 0.009389698971062899, "rewards_train/margins_2": 0.0036391722969710827, "step": 0 }, { "epoch": 0.0, "logps_train/policy_1_2": -181.58900451660156, "logps_train/policy_1_l": -189.27667236328125, "logps_train/policy_1_w": -188.43988037109375, "logps_train/policy_2_2": -162.74609375, "logps_train/policy_2_w": -213.621826171875, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -0.012806844897568226, "rewards_train/1-l": -0.009600384160876274, "rewards_train/1-w": 0.005522865802049637, "rewards_train/2-2": -0.0011708196252584457, "rewards_train/2-w": -0.013157702051103115, "rewards_train/accuracies": 0.3125, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.4375, "rewards_train/margins": 0.015123249962925911, "rewards_train/margins_1": 0.018329710699617863, "rewards_train/margins_2": 0.01198688242584467, "step": 1 }, { "epoch": 0.0, "logps_train/policy_1_2": -130.58424377441406, "logps_train/policy_1_l": -114.07572937011719, "logps_train/policy_1_w": -136.81216430664062, "logps_train/policy_2_2": -114.08601379394531, "logps_train/policy_2_w": -161.74205017089844, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 0.0038801985792815685, "rewards_train/1-l": 0.01415607612580061, "rewards_train/1-w": -0.006608224473893642, "rewards_train/2-2": -0.014363247901201248, "rewards_train/2-w": -0.012095441110432148, "rewards_train/accuracies": 0.3125, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": -0.020764300599694252, "rewards_train/margins_1": -0.010488423053175211, "rewards_train/margins_2": -0.0022678067907691, "step": 1 }, { "epoch": 0.0, "logps_train/policy_1_2": -149.79306030273438, "logps_train/policy_1_l": -132.7408905029297, "logps_train/policy_1_w": -117.1795654296875, "logps_train/policy_2_2": -122.94810485839844, "logps_train/policy_2_w": -147.82769775390625, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.02616269513964653, "rewards_train/1-l": -0.010807529091835022, "rewards_train/1-w": 0.019543347880244255, "rewards_train/2-2": 0.02589339017868042, "rewards_train/2-w": 0.01098058931529522, "rewards_train/accuracies": 0.5625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.030350876972079277, "rewards_train/margins_1": -0.006619347259402275, "rewards_train/margins_2": 0.0149128008633852, "step": 1 }, { "epoch": 0.0, "logps_train/policy_1_2": -175.33883666992188, "logps_train/policy_1_l": -160.13720703125, "logps_train/policy_1_w": -140.31259155273438, "logps_train/policy_2_2": -160.16831970214844, "logps_train/policy_2_w": -157.37881469726562, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 0.020021401345729828, "rewards_train/1-l": -0.007567992899566889, "rewards_train/1-w": -0.0056729307398200035, "rewards_train/2-2": -0.005698496475815773, "rewards_train/2-w": -0.02694460190832615, "rewards_train/accuracies": 0.4375, "rewards_train/accuracies_1": 0.3125, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.0018950621597468853, "rewards_train/margins_1": -0.02569433208554983, "rewards_train/margins_2": 0.021246105432510376, "step": 1 }, { "epoch": 0.01, "learning_rate": 2.9411764705882356e-07, "loss": 2.773, "step": 2 }, { "epoch": 0.01, "logps_train/policy_1_2": -217.56954956054688, "logps_train/policy_1_l": -133.3240966796875, "logps_train/policy_1_w": -142.56707763671875, "logps_train/policy_2_2": -192.74163818359375, "logps_train/policy_2_w": -158.75930786132812, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 0.03904004395008087, "rewards_train/1-l": 0.012513047084212303, "rewards_train/1-w": 0.0012021560687571764, "rewards_train/2-2": 0.007476043421775103, "rewards_train/2-w": -0.002884149784222245, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.3125, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": -0.011310891015455127, "rewards_train/margins_1": -0.037837887881323695, "rewards_train/margins_2": 0.010360193205997348, "step": 2 }, { "epoch": 0.01, "logps_train/policy_1_2": -185.8866424560547, "logps_train/policy_1_l": -203.91860961914062, "logps_train/policy_1_w": -158.28924560546875, "logps_train/policy_2_2": -160.1568603515625, "logps_train/policy_2_w": -185.52435302734375, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -0.012884141877293587, "rewards_train/1-l": 0.010678333230316639, "rewards_train/1-w": -0.0058770496398210526, "rewards_train/2-2": 0.009705733507871628, "rewards_train/2-w": 0.019829880446195602, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.4375, "rewards_train/margins": -0.01655538287013769, "rewards_train/margins_1": 0.007007092237472534, "rewards_train/margins_2": -0.010124146938323975, "step": 2 }, { "epoch": 0.01, "logps_train/policy_1_2": -166.21771240234375, "logps_train/policy_1_l": -126.26789855957031, "logps_train/policy_1_w": -124.3115005493164, "logps_train/policy_2_2": -146.10716247558594, "logps_train/policy_2_w": -147.67529296875, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.010443387553095818, "rewards_train/1-l": -0.012141602113842964, "rewards_train/1-w": 0.00527611467987299, "rewards_train/2-2": 0.00022089853882789612, "rewards_train/2-w": -0.009716255590319633, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.4375, "rewards_train/margins": 0.017417716793715954, "rewards_train/margins_1": 0.015719502232968807, "rewards_train/margins_2": 0.00993715412914753, "step": 2 }, { "epoch": 0.01, "logps_train/policy_1_2": -191.11569213867188, "logps_train/policy_1_l": -198.74606323242188, "logps_train/policy_1_w": -186.28964233398438, "logps_train/policy_2_2": -165.49484252929688, "logps_train/policy_2_w": -214.99496459960938, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -199.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": 0.020463669672608376, "rewards_train/1-l": 0.04804975539445877, "rewards_train/1-w": 0.030997153371572495, "rewards_train/2-2": -0.0002647899091243744, "rewards_train/2-w": 0.03390149027109146, "rewards_train/accuracies": 0.4375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.3125, "rewards_train/margins": -0.017052602022886276, "rewards_train/margins_1": 0.010533483698964119, "rewards_train/margins_2": -0.034166280180215836, "step": 2 }, { "epoch": 0.01, "logps_train/policy_1_2": -221.32244873046875, "logps_train/policy_1_l": -153.23631286621094, "logps_train/policy_1_w": -136.14810180664062, "logps_train/policy_2_2": -196.19729614257812, "logps_train/policy_2_w": -154.0006866455078, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 0.004472827538847923, "rewards_train/1-l": -0.015231356024742126, "rewards_train/1-w": 0.023665620014071465, "rewards_train/2-2": -0.0326184518635273, "rewards_train/2-w": 0.007352648302912712, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.25, "rewards_train/margins": 0.03889697603881359, "rewards_train/margins_1": 0.01919279247522354, "rewards_train/margins_2": -0.03997110016644001, "step": 3 }, { "epoch": 0.01, "logps_train/policy_1_2": -228.93414306640625, "logps_train/policy_1_l": -206.10147094726562, "logps_train/policy_1_w": -208.85382080078125, "logps_train/policy_2_2": -207.71554565429688, "logps_train/policy_2_w": -244.964599609375, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -209.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -245.0, "rewards_train/1-2": 0.020258046686649323, "rewards_train/1-l": 0.0007892083376646042, "rewards_train/1-w": 0.010127037763595581, "rewards_train/2-2": 0.0276634581387043, "rewards_train/2-w": -0.005249408073723316, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.4375, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.009337829425930977, "rewards_train/margins_1": -0.010131008923053741, "rewards_train/margins_2": 0.032912866212427616, "step": 3 }, { "epoch": 0.01, "logps_train/policy_1_2": -167.77391052246094, "logps_train/policy_1_l": -122.63655853271484, "logps_train/policy_1_w": -131.43247985839844, "logps_train/policy_2_2": -143.3928680419922, "logps_train/policy_2_w": -156.84005737304688, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 0.015576601028442383, "rewards_train/1-l": -0.008968718349933624, "rewards_train/1-w": 0.022413453087210655, "rewards_train/2-2": -0.0345987007021904, "rewards_train/2-w": -0.003927016165107489, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.3125, "rewards_train/margins": 0.03138217143714428, "rewards_train/margins_1": 0.006836852058768272, "rewards_train/margins_2": -0.03067168453708291, "step": 3 }, { "epoch": 0.01, "logps_train/policy_1_2": -207.8677978515625, "logps_train/policy_1_l": -126.68730163574219, "logps_train/policy_1_w": -147.60842895507812, "logps_train/policy_2_2": -173.59671020507812, "logps_train/policy_2_w": -178.25592041015625, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.00228248443454504, "rewards_train/1-l": -0.007011094130575657, "rewards_train/1-w": -0.010304691269993782, "rewards_train/2-2": -0.0034212307073175907, "rewards_train/2-w": 0.0010680556297302246, "rewards_train/accuracies": 0.5625, "rewards_train/accuracies_1": 0.4375, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": -0.003293597139418125, "rewards_train/margins_1": -0.012587175704538822, "rewards_train/margins_2": -0.004489286337047815, "step": 3 }, { "epoch": 0.01, "learning_rate": 5.882352941176471e-07, "loss": 2.773, "step": 4 }, { "epoch": 0.01, "logps_train/policy_1_2": -195.36886596679688, "logps_train/policy_1_l": -157.45449829101562, "logps_train/policy_1_w": -145.62728881835938, "logps_train/policy_2_2": -163.81744384765625, "logps_train/policy_2_w": -174.41024780273438, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 0.004519093781709671, "rewards_train/1-l": 0.0012300480157136917, "rewards_train/1-w": 0.007583301980048418, "rewards_train/2-2": 0.007706975564360619, "rewards_train/2-w": 0.018936166539788246, "rewards_train/accuracies": 0.5625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.006353253964334726, "rewards_train/margins_1": 0.003064208198338747, "rewards_train/margins_2": -0.011229190975427628, "step": 4 }, { "epoch": 0.01, "logps_train/policy_1_2": -205.31504821777344, "logps_train/policy_1_l": -136.07232666015625, "logps_train/policy_1_w": -154.05215454101562, "logps_train/policy_2_2": -178.46763610839844, "logps_train/policy_2_w": -176.82818603515625, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": 0.01068292185664177, "rewards_train/1-l": -0.005913878325372934, "rewards_train/1-w": 0.006113886833190918, "rewards_train/2-2": 0.03868513181805611, "rewards_train/2-w": -0.015436051413416862, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.012027765158563852, "rewards_train/margins_1": -0.0045690350234508514, "rewards_train/margins_2": 0.05412118323147297, "step": 4 }, { "epoch": 0.01, "logps_train/policy_1_2": -196.72679138183594, "logps_train/policy_1_l": -178.37020874023438, "logps_train/policy_1_w": -181.07223510742188, "logps_train/policy_2_2": -176.62295532226562, "logps_train/policy_2_w": -201.88461303710938, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -0.017991814762353897, "rewards_train/1-l": -0.033309437334537506, "rewards_train/1-w": 0.010744644328951836, "rewards_train/2-2": -0.0068259770050644875, "rewards_train/2-w": -0.00799180381000042, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 0.04405408166348934, "rewards_train/margins_1": 0.028736459091305733, "rewards_train/margins_2": 0.0011658268049359322, "step": 4 }, { "epoch": 0.01, "logps_train/policy_1_2": -293.89605712890625, "logps_train/policy_1_l": -216.96261596679688, "logps_train/policy_1_w": -218.0688934326172, "logps_train/policy_2_2": -257.89788818359375, "logps_train/policy_2_w": -254.74111938476562, "logps_train/ref_1_2": -294.0, "logps_train/ref_1_l": -217.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -258.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": 0.016644131392240524, "rewards_train/1-l": -0.007589118089526892, "rewards_train/1-w": -0.025638772174715996, "rewards_train/2-2": -0.03510119765996933, "rewards_train/2-w": 0.01182642113417387, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.3125, "rewards_train/margins": -0.018049654085189104, "rewards_train/margins_1": -0.04228290356695652, "rewards_train/margins_2": -0.0469276187941432, "step": 4 }, { "epoch": 0.01, "logps_train/policy_1_2": -127.91740417480469, "logps_train/policy_1_l": -77.5691909790039, "logps_train/policy_1_w": -109.08956146240234, "logps_train/policy_2_2": -112.87318420410156, "logps_train/policy_2_w": -120.42083740234375, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": 0.005914848763495684, "rewards_train/1-l": 0.009926008991897106, "rewards_train/1-w": 0.02034066803753376, "rewards_train/2-2": 0.009947370737791061, "rewards_train/2-w": 0.004009684547781944, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.010414659045636654, "rewards_train/margins_1": 0.014425819274038076, "rewards_train/margins_2": 0.005937686190009117, "step": 5 }, { "epoch": 0.01, "logps_train/policy_1_2": -125.036376953125, "logps_train/policy_1_l": -143.98008728027344, "logps_train/policy_1_w": -76.5767822265625, "logps_train/policy_2_2": -106.53846740722656, "logps_train/policy_2_w": -89.88783264160156, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -76.5, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -90.0, "rewards_train/1-2": -0.0069588348269462585, "rewards_train/1-l": -0.016319073736667633, "rewards_train/1-w": 0.018786698579788208, "rewards_train/2-2": -0.004041749052703381, "rewards_train/2-w": 0.00984901748597622, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.03510577231645584, "rewards_train/margins_1": 0.025745533406734467, "rewards_train/margins_2": -0.0138907665386796, "step": 5 }, { "epoch": 0.01, "logps_train/policy_1_2": -194.3038330078125, "logps_train/policy_1_l": -138.6630859375, "logps_train/policy_1_w": -163.1993408203125, "logps_train/policy_2_2": -171.92251586914062, "logps_train/policy_2_w": -188.73898315429688, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": 0.035558801144361496, "rewards_train/1-l": 0.0015388056635856628, "rewards_train/1-w": 0.014368102885782719, "rewards_train/2-2": 0.016048748046159744, "rewards_train/2-w": 0.013699157163500786, "rewards_train/accuracies": 0.5625, "rewards_train/accuracies_1": 0.3125, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 0.012829297222197056, "rewards_train/margins_1": -0.021190698258578777, "rewards_train/margins_2": 0.0023495908826589584, "step": 5 }, { "epoch": 0.01, "logps_train/policy_1_2": -158.9042205810547, "logps_train/policy_1_l": -131.32846069335938, "logps_train/policy_1_w": -138.11251831054688, "logps_train/policy_2_2": -138.7289276123047, "logps_train/policy_2_w": -161.90760803222656, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 0.037264250218868256, "rewards_train/1-l": 0.00660697091370821, "rewards_train/1-w": 0.04182542487978935, "rewards_train/2-2": 0.03599400445818901, "rewards_train/2-w": 0.022910702973604202, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.03521845396608114, "rewards_train/margins_1": 0.004561174660921097, "rewards_train/margins_2": 0.013083301484584808, "step": 5 }, { "epoch": 0.02, "learning_rate": 8.823529411764707e-07, "loss": 2.7556, "step": 6 }, { "epoch": 0.02, "logps_train/policy_1_2": -177.57357788085938, "logps_train/policy_1_l": -132.2242431640625, "logps_train/policy_1_w": -145.00375366210938, "logps_train/policy_2_2": -156.66192626953125, "logps_train/policy_2_w": -169.57278442382812, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.024283356964588165, "rewards_train/1-l": 0.01300128921866417, "rewards_train/1-w": 0.03790505975484848, "rewards_train/2-2": 0.008074769750237465, "rewards_train/2-w": 0.01967434585094452, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.02490377053618431, "rewards_train/margins_1": 0.013621702790260315, "rewards_train/margins_2": -0.011599576100707054, "step": 6 }, { "epoch": 0.02, "logps_train/policy_1_2": -115.2109375, "logps_train/policy_1_l": -79.9293212890625, "logps_train/policy_1_w": -92.6666259765625, "logps_train/policy_2_2": -90.95626831054688, "logps_train/policy_2_w": -106.32064819335938, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -106.5, "rewards_train/1-2": 0.00537017872557044, "rewards_train/1-l": 0.01077932957559824, "rewards_train/1-w": 0.0011107204481959343, "rewards_train/2-2": 0.002469309838488698, "rewards_train/2-w": 0.011294269934296608, "rewards_train/accuracies": 0.5, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": -0.009668609127402306, "rewards_train/margins_1": -0.004259458277374506, "rewards_train/margins_2": -0.00882496009580791, "step": 6 }, { "epoch": 0.02, "logps_train/policy_1_2": -145.27964782714844, "logps_train/policy_1_l": -162.06942749023438, "logps_train/policy_1_w": -146.14695739746094, "logps_train/policy_2_2": -124.16050720214844, "logps_train/policy_2_w": -171.76065063476562, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -0.018199311569333076, "rewards_train/1-l": 0.0041175466030836105, "rewards_train/1-w": 0.032960258424282074, "rewards_train/2-2": -0.013511979021131992, "rewards_train/2-w": 0.050302810966968536, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.0625, "rewards_train/margins": 0.028842711821198463, "rewards_train/margins_1": 0.05115956999361515, "rewards_train/margins_2": -0.06381478998810053, "step": 6 }, { "epoch": 0.02, "logps_train/policy_1_2": -246.12103271484375, "logps_train/policy_1_l": -153.4169921875, "logps_train/policy_1_w": -158.6375274658203, "logps_train/policy_2_2": -216.6551513671875, "logps_train/policy_2_w": -177.5287628173828, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -217.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.019145112484693527, "rewards_train/1-l": 0.015527607873082161, "rewards_train/1-w": 0.04156932234764099, "rewards_train/2-2": 0.05831431970000267, "rewards_train/2-w": 0.015287458896636963, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 0.02604171447455883, "rewards_train/margins_1": 0.022424209862947464, "rewards_train/margins_2": 0.04302686080336571, "step": 6 }, { "epoch": 0.02, "logps_train/policy_1_2": -154.94711303710938, "logps_train/policy_1_l": -183.77301025390625, "logps_train/policy_1_w": -212.72943115234375, "logps_train/policy_2_2": -134.86480712890625, "logps_train/policy_2_w": -242.79090881347656, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -213.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -243.0, "rewards_train/1-2": 0.030971966683864594, "rewards_train/1-l": 0.012150421738624573, "rewards_train/1-w": 0.04150954633951187, "rewards_train/2-2": 0.0400390550494194, "rewards_train/2-w": 0.058797888457775116, "rewards_train/accuracies": 0.5625, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.4375, "rewards_train/margins": 0.0293591246008873, "rewards_train/margins_1": 0.010537579655647278, "rewards_train/margins_2": -0.018758833408355713, "step": 7 }, { "epoch": 0.02, "logps_train/policy_1_2": -198.96221923828125, "logps_train/policy_1_l": -196.141357421875, "logps_train/policy_1_w": -189.1175537109375, "logps_train/policy_2_2": -167.00091552734375, "logps_train/policy_2_w": -221.66448974609375, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": 0.0701853558421135, "rewards_train/1-l": -0.0010489299893379211, "rewards_train/1-w": 0.028087865561246872, "rewards_train/2-2": 0.0694393515586853, "rewards_train/2-w": 0.05386331304907799, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.3125, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.029136795550584793, "rewards_train/margins_1": -0.04209749028086662, "rewards_train/margins_2": 0.015576038509607315, "step": 7 }, { "epoch": 0.02, "logps_train/policy_1_2": -212.93087768554688, "logps_train/policy_1_l": -152.8028106689453, "logps_train/policy_1_w": -189.01724243164062, "logps_train/policy_2_2": -183.44415283203125, "logps_train/policy_2_w": -217.46878051757812, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 0.030350446701049805, "rewards_train/1-l": 0.0064373379573225975, "rewards_train/1-w": 0.051399897783994675, "rewards_train/2-2": 0.0477726012468338, "rewards_train/2-w": 0.05976332351565361, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.04496255982667208, "rewards_train/margins_1": 0.02104945108294487, "rewards_train/margins_2": -0.011990722268819809, "step": 7 }, { "epoch": 0.02, "logps_train/policy_1_2": -228.69686889648438, "logps_train/policy_1_l": -179.7490692138672, "logps_train/policy_1_w": -180.7938690185547, "logps_train/policy_2_2": -200.89344787597656, "logps_train/policy_2_w": -209.7539825439453, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": 0.05062472075223923, "rewards_train/1-l": 0.0035111159086227417, "rewards_train/1-w": 0.024714704602956772, "rewards_train/2-2": 0.09581109881401062, "rewards_train/2-w": 0.014446472749114037, "rewards_train/accuracies": 0.5625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 0.02120358869433403, "rewards_train/margins_1": -0.025910016149282455, "rewards_train/margins_2": 0.08136462606489658, "step": 7 }, { "epoch": 0.02, "learning_rate": 1.1764705882352942e-06, "loss": 2.7288, "step": 8 }, { "epoch": 0.02, "logps_train/policy_1_2": -115.73379516601562, "logps_train/policy_1_l": -80.82598876953125, "logps_train/policy_1_w": -100.07675170898438, "logps_train/policy_2_2": -101.2200698852539, "logps_train/policy_2_w": -120.53211212158203, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": 0.028183095157146454, "rewards_train/1-l": 0.011150926351547241, "rewards_train/1-w": 0.032949548214673996, "rewards_train/2-2": 0.0506492555141449, "rewards_train/2-w": 0.01553869154304266, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 0.021798621863126755, "rewards_train/margins_1": 0.004766453057527542, "rewards_train/margins_2": 0.03511056397110224, "step": 8 }, { "epoch": 0.02, "logps_train/policy_1_2": -197.45643615722656, "logps_train/policy_1_l": -148.34783935546875, "logps_train/policy_1_w": -178.07864379882812, "logps_train/policy_2_2": -173.0404052734375, "logps_train/policy_2_w": -203.46096801757812, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": 0.07779403775930405, "rewards_train/1-l": 0.04441613331437111, "rewards_train/1-w": 0.10463526844978333, "rewards_train/2-2": 0.10904428362846375, "rewards_train/2-w": 0.05546526610851288, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.060219135135412216, "rewards_train/margins_1": 0.02684123069047928, "rewards_train/margins_2": 0.05357901751995087, "step": 8 }, { "epoch": 0.02, "logps_train/policy_1_2": -148.08786010742188, "logps_train/policy_1_l": -107.22083282470703, "logps_train/policy_1_w": -145.36483764648438, "logps_train/policy_2_2": -126.78526306152344, "logps_train/policy_2_w": -170.40573120117188, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -107.5, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": 0.05097971856594086, "rewards_train/1-l": 0.035045601427555084, "rewards_train/1-w": 0.07650444656610489, "rewards_train/2-2": 0.06717702746391296, "rewards_train/2-w": 0.06206369400024414, "rewards_train/accuracies": 0.5625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.041458845138549805, "rewards_train/margins_1": 0.025524728000164032, "rewards_train/margins_2": 0.005113333463668823, "step": 8 }, { "epoch": 0.02, "logps_train/policy_1_2": -162.4486541748047, "logps_train/policy_1_l": -166.48367309570312, "logps_train/policy_1_w": -132.70101928710938, "logps_train/policy_2_2": -136.06033325195312, "logps_train/policy_2_w": -153.7462615966797, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": -0.001896618865430355, "rewards_train/1-l": 0.026926327496767044, "rewards_train/1-w": 0.042202193289995193, "rewards_train/2-2": 0.05998188257217407, "rewards_train/2-w": 0.051937103271484375, "rewards_train/accuracies": 0.4375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 0.01527586579322815, "rewards_train/margins_1": 0.04409881215542555, "rewards_train/margins_2": 0.008044779300689697, "step": 8 }, { "epoch": 0.03, "logps_train/policy_1_2": -185.7164306640625, "logps_train/policy_1_l": -138.24658203125, "logps_train/policy_1_w": -173.66470336914062, "logps_train/policy_2_2": -159.9189910888672, "logps_train/policy_2_w": -195.73085021972656, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 0.09613040834665298, "rewards_train/1-l": 0.03398395702242851, "rewards_train/1-w": 0.11165380477905273, "rewards_train/2-2": 0.11523036658763885, "rewards_train/2-w": 0.08941490948200226, "rewards_train/accuracies": 0.5625, "rewards_train/accuracies_1": 0.375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.07766984775662422, "rewards_train/margins_1": 0.01552339643239975, "rewards_train/margins_2": 0.025815457105636597, "step": 9 }, { "epoch": 0.03, "logps_train/policy_1_2": -270.426513671875, "logps_train/policy_1_l": -177.56228637695312, "logps_train/policy_1_w": -186.03341674804688, "logps_train/policy_2_2": -241.08541870117188, "logps_train/policy_2_w": -217.00242614746094, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -243.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": 0.18332624435424805, "rewards_train/1-l": 0.036350078880786896, "rewards_train/1-w": 0.1412871778011322, "rewards_train/2-2": 0.1880396455526352, "rewards_train/2-w": 0.07280363142490387, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.4375, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 0.1049370989203453, "rewards_train/margins_1": -0.042039066553115845, "rewards_train/margins_2": 0.11523601412773132, "step": 9 }, { "epoch": 0.03, "logps_train/policy_1_2": -171.03726196289062, "logps_train/policy_1_l": -88.32797241210938, "logps_train/policy_1_w": -105.43644714355469, "logps_train/policy_2_2": -146.68141174316406, "logps_train/policy_2_w": -123.7166748046875, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": 0.09158627688884735, "rewards_train/1-l": 0.024673476815223694, "rewards_train/1-w": 0.07158984988927841, "rewards_train/2-2": 0.1326400488615036, "rewards_train/2-w": 0.07598898559808731, "rewards_train/accuracies": 0.5625, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 0.04691637307405472, "rewards_train/margins_1": -0.01999642699956894, "rewards_train/margins_2": 0.05665106326341629, "step": 9 }, { "epoch": 0.03, "logps_train/policy_1_2": -133.2315216064453, "logps_train/policy_1_l": -126.39393615722656, "logps_train/policy_1_w": -105.28231048583984, "logps_train/policy_2_2": -112.42765808105469, "logps_train/policy_2_w": -127.82316589355469, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": 0.053996432572603226, "rewards_train/1-l": 0.008507068268954754, "rewards_train/1-w": 0.06073366105556488, "rewards_train/2-2": 0.06397297978401184, "rewards_train/2-w": 0.050886835902929306, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.052226592786610126, "rewards_train/margins_1": 0.006737228482961655, "rewards_train/margins_2": 0.013086143881082535, "step": 9 }, { "epoch": 0.03, "learning_rate": 1.4705882352941177e-06, "loss": 2.6563, "step": 10 }, { "epoch": 0.03, "logps_train/policy_1_2": -205.59677124023438, "logps_train/policy_1_l": -161.2711944580078, "logps_train/policy_1_w": -191.57229614257812, "logps_train/policy_2_2": -174.95782470703125, "logps_train/policy_2_w": -221.24452209472656, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": 0.14891812205314636, "rewards_train/1-l": 0.030303336679935455, "rewards_train/1-w": 0.20605286955833435, "rewards_train/2-2": 0.2010919451713562, "rewards_train/2-w": 0.1427355408668518, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 0.1757495328783989, "rewards_train/margins_1": 0.05713474750518799, "rewards_train/margins_2": 0.058356404304504395, "step": 10 }, { "epoch": 0.03, "logps_train/policy_1_2": -87.02449035644531, "logps_train/policy_1_l": -84.72859191894531, "logps_train/policy_1_w": -90.03756713867188, "logps_train/policy_2_2": -70.571044921875, "logps_train/policy_2_w": -117.09343719482422, "logps_train/ref_1_2": -87.0, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -71.0, "logps_train/ref_2_w": -117.5, "rewards_train/1-2": 0.016105908900499344, "rewards_train/1-l": 0.02567652054131031, "rewards_train/1-w": 0.11030565202236176, "rewards_train/2-2": 0.03215338662266731, "rewards_train/2-w": 0.03596891090273857, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.08462913148105145, "rewards_train/margins_1": 0.09419974312186241, "rewards_train/margins_2": -0.0038155242800712585, "step": 10 }, { "epoch": 0.03, "logps_train/policy_1_2": -196.40296936035156, "logps_train/policy_1_l": -199.19427490234375, "logps_train/policy_1_w": -179.3260498046875, "logps_train/policy_2_2": -172.65182495117188, "logps_train/policy_2_w": -209.65145874023438, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 0.12142082303762436, "rewards_train/1-l": 0.0561581552028656, "rewards_train/1-w": 0.18653514981269836, "rewards_train/2-2": 0.13696491718292236, "rewards_train/2-w": 0.11727554351091385, "rewards_train/accuracies": 0.5625, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.4375, "rewards_train/margins": 0.13037699460983276, "rewards_train/margins_1": 0.065114326775074, "rewards_train/margins_2": 0.019689373672008514, "step": 10 }, { "epoch": 0.03, "logps_train/policy_1_2": -150.12266540527344, "logps_train/policy_1_l": -134.14999389648438, "logps_train/policy_1_w": -150.79884338378906, "logps_train/policy_2_2": -133.9109344482422, "logps_train/policy_2_w": -168.35696411132812, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.08109284937381744, "rewards_train/1-l": 0.03485388308763504, "rewards_train/1-w": 0.14426065981388092, "rewards_train/2-2": 0.11671925336122513, "rewards_train/2-w": 0.1025862991809845, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.10940677672624588, "rewards_train/margins_1": 0.06316781044006348, "rewards_train/margins_2": 0.014132954180240631, "step": 10 }, { "epoch": 0.03, "logps_train/policy_1_2": -127.73130798339844, "logps_train/policy_1_l": -124.2657470703125, "logps_train/policy_1_w": -197.6813201904297, "logps_train/policy_2_2": -108.32481384277344, "logps_train/policy_2_w": -230.15286254882812, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 0.06436820328235626, "rewards_train/1-l": 0.03006596677005291, "rewards_train/1-w": 0.1993473619222641, "rewards_train/2-2": 0.10267497599124908, "rewards_train/2-w": 0.16498786211013794, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.3125, "rewards_train/margins": 0.1692813951522112, "rewards_train/margins_1": 0.13497915863990784, "rewards_train/margins_2": -0.062312886118888855, "step": 11 }, { "epoch": 0.03, "logps_train/policy_1_2": -221.43780517578125, "logps_train/policy_1_l": -170.9988250732422, "logps_train/policy_1_w": -150.30120849609375, "logps_train/policy_2_2": -199.10693359375, "logps_train/policy_2_w": -170.4422607421875, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": 0.2573900520801544, "rewards_train/1-l": 0.052949562668800354, "rewards_train/1-w": 0.18101289868354797, "rewards_train/2-2": 0.2759278416633606, "rewards_train/2-w": 0.10792332887649536, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 0.12806333601474762, "rewards_train/margins_1": -0.07637715339660645, "rewards_train/margins_2": 0.16800451278686523, "step": 11 }, { "epoch": 0.03, "logps_train/policy_1_2": -170.09323120117188, "logps_train/policy_1_l": -162.036376953125, "logps_train/policy_1_w": -168.57183837890625, "logps_train/policy_2_2": -150.46151733398438, "logps_train/policy_2_w": -190.49441528320312, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.1307172179222107, "rewards_train/1-l": 0.08268893510103226, "rewards_train/1-w": 0.21527786552906036, "rewards_train/2-2": 0.14134816825389862, "rewards_train/2-w": 0.16618502140045166, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.1325889304280281, "rewards_train/margins_1": 0.08456064760684967, "rewards_train/margins_2": -0.02483685314655304, "step": 11 }, { "epoch": 0.03, "logps_train/policy_1_2": -88.3341064453125, "logps_train/policy_1_l": -108.40958404541016, "logps_train/policy_1_w": -89.92201232910156, "logps_train/policy_2_2": -76.48867797851562, "logps_train/policy_2_w": -105.95520782470703, "logps_train/ref_1_2": -89.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -77.5, "logps_train/ref_2_w": -106.5, "rewards_train/1-2": 0.06627162545919418, "rewards_train/1-l": 0.03560428321361542, "rewards_train/1-w": 0.10447852313518524, "rewards_train/2-2": 0.10295066237449646, "rewards_train/2-w": 0.06463562697172165, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.375, "rewards_train/margins": 0.06887423992156982, "rewards_train/margins_1": 0.03820689767599106, "rewards_train/margins_2": 0.03831503540277481, "step": 11 }, { "epoch": 0.04, "learning_rate": 1.7647058823529414e-06, "loss": 2.5714, "step": 12 }, { "epoch": 0.04, "logps_train/policy_1_2": -231.89874267578125, "logps_train/policy_1_l": -143.920166015625, "logps_train/policy_1_w": -159.78640747070312, "logps_train/policy_2_2": -200.0634002685547, "logps_train/policy_2_w": -180.48483276367188, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -205.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 0.341619610786438, "rewards_train/1-l": 0.08386167883872986, "rewards_train/1-w": 0.40876150131225586, "rewards_train/2-2": 0.4953199028968811, "rewards_train/2-w": 0.3405787944793701, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 0.324899822473526, "rewards_train/margins_1": 0.06714189052581787, "rewards_train/margins_2": 0.154741108417511, "step": 12 }, { "epoch": 0.04, "logps_train/policy_1_2": -168.62924194335938, "logps_train/policy_1_l": -126.26393127441406, "logps_train/policy_1_w": -141.8515167236328, "logps_train/policy_2_2": -146.25357055664062, "logps_train/policy_2_w": -164.4496307373047, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 0.25113800168037415, "rewards_train/1-l": 0.07770836353302002, "rewards_train/1-w": 0.35078608989715576, "rewards_train/2-2": 0.35745543241500854, "rewards_train/2-w": 0.28081777691841125, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.27307772636413574, "rewards_train/margins_1": 0.09964808821678162, "rewards_train/margins_2": 0.07663765549659729, "step": 12 }, { "epoch": 0.04, "logps_train/policy_1_2": -183.5927276611328, "logps_train/policy_1_l": -124.70578002929688, "logps_train/policy_1_w": -124.76033020019531, "logps_train/policy_2_2": -157.92626953125, "logps_train/policy_2_w": -144.54666137695312, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.28174304962158203, "rewards_train/1-l": 0.05910956859588623, "rewards_train/1-w": 0.23043662309646606, "rewards_train/2-2": 0.37026453018188477, "rewards_train/2-w": 0.16252095997333527, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 0.17132705450057983, "rewards_train/margins_1": -0.05130642652511597, "rewards_train/margins_2": 0.2077435702085495, "step": 12 }, { "epoch": 0.04, "logps_train/policy_1_2": -186.33859252929688, "logps_train/policy_1_l": -173.48294067382812, "logps_train/policy_1_w": -162.09738159179688, "logps_train/policy_2_2": -154.35104370117188, "logps_train/policy_2_w": -196.128173828125, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 0.1794210970401764, "rewards_train/1-l": 0.10619853436946869, "rewards_train/1-w": 0.3168238401412964, "rewards_train/2-2": 0.30942654609680176, "rewards_train/2-w": 0.20710498094558716, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.2106253057718277, "rewards_train/margins_1": 0.13740274310112, "rewards_train/margins_2": 0.1023215651512146, "step": 12 }, { "epoch": 0.04, "logps_train/policy_1_2": -122.49327850341797, "logps_train/policy_1_l": -135.97007751464844, "logps_train/policy_1_w": -125.45499420166016, "logps_train/policy_2_2": -105.18260192871094, "logps_train/policy_2_w": -144.74215698242188, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.21707844734191895, "rewards_train/1-l": 0.09437482804059982, "rewards_train/1-w": 0.4019624590873718, "rewards_train/2-2": 0.2950207591056824, "rewards_train/2-w": 0.29531610012054443, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.307587631046772, "rewards_train/margins_1": 0.18488401174545288, "rewards_train/margins_2": -0.00029534101486206055, "step": 13 }, { "epoch": 0.04, "logps_train/policy_1_2": -140.85015869140625, "logps_train/policy_1_l": -112.3922119140625, "logps_train/policy_1_w": -150.28219604492188, "logps_train/policy_2_2": -123.27210998535156, "logps_train/policy_2_w": -173.85848999023438, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.22201578319072723, "rewards_train/1-l": 0.035779520869255066, "rewards_train/1-w": 0.469435453414917, "rewards_train/2-2": 0.2792346477508545, "rewards_train/2-w": 0.35673004388809204, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.4336559325456619, "rewards_train/margins_1": 0.24741967022418976, "rewards_train/margins_2": -0.07749539613723755, "step": 13 }, { "epoch": 0.04, "logps_train/policy_1_2": -151.60231018066406, "logps_train/policy_1_l": -139.839599609375, "logps_train/policy_1_w": -93.87045288085938, "logps_train/policy_2_2": -129.6639862060547, "logps_train/policy_2_w": -109.81555938720703, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -111.5, "rewards_train/1-2": 0.23371411859989166, "rewards_train/1-l": 0.09353040158748627, "rewards_train/1-w": 0.2309236228466034, "rewards_train/2-2": 0.3147045075893402, "rewards_train/2-w": 0.1804555058479309, "rewards_train/accuracies": 0.625, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.13739322125911713, "rewards_train/margins_1": -0.002790495753288269, "rewards_train/margins_2": 0.1342490017414093, "step": 13 }, { "epoch": 0.04, "logps_train/policy_1_2": -131.57220458984375, "logps_train/policy_1_l": -85.50208282470703, "logps_train/policy_1_w": -102.77734375, "logps_train/policy_2_2": -108.42292785644531, "logps_train/policy_2_w": -127.98008728027344, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -86.0, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": 0.22793477773666382, "rewards_train/1-l": 0.01854179985821247, "rewards_train/1-w": 0.2742188572883606, "rewards_train/2-2": 0.332707941532135, "rewards_train/2-w": 0.17503845691680908, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 0.2556770574301481, "rewards_train/margins_1": 0.04628407955169678, "rewards_train/margins_2": 0.15766948461532593, "step": 13 }, { "epoch": 0.04, "learning_rate": 2.058823529411765e-06, "loss": 2.3808, "step": 14 }, { "epoch": 0.04, "logps_train/policy_1_2": -149.05189514160156, "logps_train/policy_1_l": -159.74192810058594, "logps_train/policy_1_w": -166.26947021484375, "logps_train/policy_2_2": -132.91519165039062, "logps_train/policy_2_w": -186.8763427734375, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": 0.22820881009101868, "rewards_train/1-l": 0.10495686531066895, "rewards_train/1-w": 0.5242239236831665, "rewards_train/2-2": 0.327034592628479, "rewards_train/2-w": 0.4283812940120697, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.4375, "rewards_train/margins": 0.41926705837249756, "rewards_train/margins_1": 0.2960151135921478, "rewards_train/margins_2": -0.1013467013835907, "step": 14 }, { "epoch": 0.04, "logps_train/policy_1_2": -150.2283477783203, "logps_train/policy_1_l": -155.64566040039062, "logps_train/policy_1_w": -146.92123413085938, "logps_train/policy_2_2": -126.35533905029297, "logps_train/policy_2_w": -172.11419677734375, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 0.23888374865055084, "rewards_train/1-l": 0.10264641791582108, "rewards_train/1-w": 0.3735019564628601, "rewards_train/2-2": 0.3849739730358124, "rewards_train/2-w": 0.24092504382133484, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.27085553854703903, "rewards_train/margins_1": 0.13461820781230927, "rewards_train/margins_2": 0.14404892921447754, "step": 14 }, { "epoch": 0.04, "logps_train/policy_1_2": -138.91030883789062, "logps_train/policy_1_l": -99.95610046386719, "logps_train/policy_1_w": -104.7943344116211, "logps_train/policy_2_2": -120.85820007324219, "logps_train/policy_2_w": -119.46273040771484, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": 0.31189990043640137, "rewards_train/1-l": 0.06151924654841423, "rewards_train/1-w": 0.23111341893672943, "rewards_train/2-2": 0.4048052430152893, "rewards_train/2-w": 0.15919539332389832, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 0.1695941723883152, "rewards_train/margins_1": -0.08078648149967194, "rewards_train/margins_2": 0.245609849691391, "step": 14 }, { "epoch": 0.04, "logps_train/policy_1_2": -146.833251953125, "logps_train/policy_1_l": -80.91234588623047, "logps_train/policy_1_w": -98.30854034423828, "logps_train/policy_2_2": -125.26580810546875, "logps_train/policy_2_w": -117.947509765625, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -81.5, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": 0.23698683083057404, "rewards_train/1-l": 0.0393807515501976, "rewards_train/1-w": 0.29609936475753784, "rewards_train/2-2": 0.36052900552749634, "rewards_train/2-w": 0.22185057401657104, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.4375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.25671861320734024, "rewards_train/margins_1": 0.059112533926963806, "rewards_train/margins_2": 0.1386784315109253, "step": 14 }, { "epoch": 0.04, "logps_train/policy_1_2": -129.44873046875, "logps_train/policy_1_l": -110.41493225097656, "logps_train/policy_1_w": -129.31539916992188, "logps_train/policy_2_2": -104.10504913330078, "logps_train/policy_2_w": -149.5266876220703, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": 0.12504863739013672, "rewards_train/1-l": 0.05876515060663223, "rewards_train/1-w": 0.47080349922180176, "rewards_train/2-2": 0.2355889528989792, "rewards_train/2-w": 0.3426428437232971, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.4375, "rewards_train/margins": 0.4120383486151695, "rewards_train/margins_1": 0.34575486183166504, "rewards_train/margins_2": -0.10705389082431793, "step": 15 }, { "epoch": 0.04, "logps_train/policy_1_2": -170.2947998046875, "logps_train/policy_1_l": -189.98480224609375, "logps_train/policy_1_w": -170.3165283203125, "logps_train/policy_2_2": -144.12612915039062, "logps_train/policy_2_w": -200.84783935546875, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 0.3670039772987366, "rewards_train/1-l": 0.16113752126693726, "rewards_train/1-w": 0.724596381187439, "rewards_train/2-2": 0.5463714599609375, "rewards_train/2-w": 0.5222466588020325, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.5634588599205017, "rewards_train/margins_1": 0.3575924038887024, "rewards_train/margins_2": 0.02412480115890503, "step": 15 }, { "epoch": 0.04, "logps_train/policy_1_2": -118.49765014648438, "logps_train/policy_1_l": -114.10871887207031, "logps_train/policy_1_w": -116.33987426757812, "logps_train/policy_2_2": -105.04020690917969, "logps_train/policy_2_w": -139.48828125, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 0.2988675832748413, "rewards_train/1-l": 0.04196079820394516, "rewards_train/1-w": 0.2983369827270508, "rewards_train/2-2": 0.3461751937866211, "rewards_train/2-w": 0.1396496742963791, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 0.2563761845231056, "rewards_train/margins_1": -0.0005306005477905273, "rewards_train/margins_2": 0.206525519490242, "step": 15 }, { "epoch": 0.04, "logps_train/policy_1_2": -150.18162536621094, "logps_train/policy_1_l": -128.48489379882812, "logps_train/policy_1_w": -146.36077880859375, "logps_train/policy_2_2": -122.75877380371094, "logps_train/policy_2_w": -179.2103271484375, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": 0.25722867250442505, "rewards_train/1-l": 0.08432238548994064, "rewards_train/1-w": 0.3029850423336029, "rewards_train/2-2": 0.4364272952079773, "rewards_train/2-w": 0.16216963529586792, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 0.21866265684366226, "rewards_train/margins_1": 0.045756369829177856, "rewards_train/margins_2": 0.2742576599121094, "step": 15 }, { "epoch": 0.05, "learning_rate": 2.3529411764705885e-06, "loss": 2.3405, "step": 16 }, { "epoch": 0.05, "logps_train/policy_1_2": -147.47177124023438, "logps_train/policy_1_l": -123.18833923339844, "logps_train/policy_1_w": -146.80714416503906, "logps_train/policy_2_2": -120.88013458251953, "logps_train/policy_2_w": -174.3365478515625, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 0.1803862452507019, "rewards_train/1-l": 0.0533582977950573, "rewards_train/1-w": 0.698704183101654, "rewards_train/2-2": 0.3008291721343994, "rewards_train/2-w": 0.444176584482193, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.6453458853065968, "rewards_train/margins_1": 0.5183179378509521, "rewards_train/margins_2": -0.14334741234779358, "step": 16 }, { "epoch": 0.05, "logps_train/policy_1_2": -149.87612915039062, "logps_train/policy_1_l": -150.56626892089844, "logps_train/policy_1_w": -161.04844665527344, "logps_train/policy_2_2": -125.95801544189453, "logps_train/policy_2_w": -192.70114135742188, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 0.21551206707954407, "rewards_train/1-l": 0.03077523782849312, "rewards_train/1-w": 0.8461315631866455, "rewards_train/2-2": 0.43447208404541016, "rewards_train/2-w": 0.5853544473648071, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.4375, "rewards_train/margins": 0.8153563253581524, "rewards_train/margins_1": 0.6306194961071014, "rewards_train/margins_2": -0.15088236331939697, "step": 16 }, { "epoch": 0.05, "logps_train/policy_1_2": -181.7067413330078, "logps_train/policy_1_l": -200.4961395263672, "logps_train/policy_1_w": -148.79580688476562, "logps_train/policy_2_2": -155.80563354492188, "logps_train/policy_2_w": -170.17672729492188, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 0.5418262481689453, "rewards_train/1-l": 0.20136353373527527, "rewards_train/1-w": 0.6899511814117432, "rewards_train/2-2": 0.6655310392379761, "rewards_train/2-w": 0.5299841165542603, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 0.4885876476764679, "rewards_train/margins_1": 0.14812493324279785, "rewards_train/margins_2": 0.13554692268371582, "step": 16 }, { "epoch": 0.05, "logps_train/policy_1_2": -144.69903564453125, "logps_train/policy_1_l": -125.51148223876953, "logps_train/policy_1_w": -142.7015380859375, "logps_train/policy_2_2": -120.93391418457031, "logps_train/policy_2_w": -170.89846801757812, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 0.35587793588638306, "rewards_train/1-l": 0.14484810829162598, "rewards_train/1-w": 0.6376574039459229, "rewards_train/2-2": 0.4950851798057556, "rewards_train/2-w": 0.4624961018562317, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.4928092956542969, "rewards_train/margins_1": 0.2817794680595398, "rewards_train/margins_2": 0.032589077949523926, "step": 16 }, { "epoch": 0.05, "logps_train/policy_1_2": -191.72073364257812, "logps_train/policy_1_l": -176.81326293945312, "logps_train/policy_1_w": -154.38104248046875, "logps_train/policy_2_2": -161.2462615966797, "logps_train/policy_2_w": -179.92816162109375, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": 0.5537075996398926, "rewards_train/1-l": 0.029514247551560402, "rewards_train/1-w": 0.8972479701042175, "rewards_train/2-2": 0.948810875415802, "rewards_train/2-w": 0.6458554863929749, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.8677337225526571, "rewards_train/margins_1": 0.34354037046432495, "rewards_train/margins_2": 0.30295538902282715, "step": 17 }, { "epoch": 0.05, "logps_train/policy_1_2": -179.20449829101562, "logps_train/policy_1_l": -188.78988647460938, "logps_train/policy_1_w": -122.87844848632812, "logps_train/policy_2_2": -155.54348754882812, "logps_train/policy_2_w": -141.5835723876953, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.3022055923938751, "rewards_train/1-l": 0.10328607261180878, "rewards_train/1-w": 0.6531704664230347, "rewards_train/2-2": 0.5140091776847839, "rewards_train/2-w": 0.43656450510025024, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.5498843938112259, "rewards_train/margins_1": 0.35096487402915955, "rewards_train/margins_2": 0.07744467258453369, "step": 17 }, { "epoch": 0.05, "logps_train/policy_1_2": -212.6986541748047, "logps_train/policy_1_l": -207.909423828125, "logps_train/policy_1_w": -161.18228149414062, "logps_train/policy_2_2": -177.51510620117188, "logps_train/policy_2_w": -202.697998046875, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": 0.8512275218963623, "rewards_train/1-l": 0.09372672438621521, "rewards_train/1-w": 0.9372413158416748, "rewards_train/2-2": 1.2395045757293701, "rewards_train/2-w": 0.552074134349823, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.8435145914554596, "rewards_train/margins_1": 0.0860137939453125, "rewards_train/margins_2": 0.6874304413795471, "step": 17 }, { "epoch": 0.05, "logps_train/policy_1_2": -177.32876586914062, "logps_train/policy_1_l": -162.58737182617188, "logps_train/policy_1_w": -131.7937774658203, "logps_train/policy_2_2": -147.93893432617188, "logps_train/policy_2_w": -170.04930114746094, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": 0.7179524898529053, "rewards_train/1-l": -0.03908387944102287, "rewards_train/1-w": 0.8143718838691711, "rewards_train/2-2": 0.9510772824287415, "rewards_train/2-w": 0.4294452667236328, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.853455763310194, "rewards_train/margins_1": 0.09641939401626587, "rewards_train/margins_2": 0.5216320157051086, "step": 17 }, { "epoch": 0.05, "learning_rate": 2.647058823529412e-06, "loss": 2.0918, "step": 18 }, { "epoch": 0.05, "logps_train/policy_1_2": -161.63265991210938, "logps_train/policy_1_l": -123.56356048583984, "logps_train/policy_1_w": -159.10845947265625, "logps_train/policy_2_2": -140.89573669433594, "logps_train/policy_2_w": -182.62355041503906, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.5257976055145264, "rewards_train/1-l": -0.033650875091552734, "rewards_train/1-w": 1.0997000932693481, "rewards_train/2-2": 0.8877711296081543, "rewards_train/2-w": 0.8685052394866943, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.1333509683609009, "rewards_train/margins_1": 0.5739024877548218, "rewards_train/margins_2": 0.01926589012145996, "step": 18 }, { "epoch": 0.05, "logps_train/policy_1_2": -167.13771057128906, "logps_train/policy_1_l": -143.77279663085938, "logps_train/policy_1_w": -147.17788696289062, "logps_train/policy_2_2": -146.30361938476562, "logps_train/policy_2_w": -172.89215087890625, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 0.7803695201873779, "rewards_train/1-l": 0.02213483303785324, "rewards_train/1-w": 1.2122883796691895, "rewards_train/2-2": 1.0079188346862793, "rewards_train/2-w": 0.7432061433792114, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 1.1901535466313362, "rewards_train/margins_1": 0.4319188594818115, "rewards_train/margins_2": 0.26471269130706787, "step": 18 }, { "epoch": 0.05, "logps_train/policy_1_2": -157.979248046875, "logps_train/policy_1_l": -149.8570556640625, "logps_train/policy_1_w": -145.67642211914062, "logps_train/policy_2_2": -132.87477111816406, "logps_train/policy_2_w": -173.6650390625, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": 0.5102773904800415, "rewards_train/1-l": -0.08477799594402313, "rewards_train/1-w": 0.8858725428581238, "rewards_train/2-2": 0.8756086826324463, "rewards_train/2-w": 0.47841888666152954, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.4375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.9706505388021469, "rewards_train/margins_1": 0.3755951523780823, "rewards_train/margins_2": 0.39718979597091675, "step": 18 }, { "epoch": 0.05, "logps_train/policy_1_2": -207.72116088867188, "logps_train/policy_1_l": -143.0449981689453, "logps_train/policy_1_w": -150.1810302734375, "logps_train/policy_2_2": -180.78555297851562, "logps_train/policy_2_w": -174.07220458984375, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": 0.8950719833374023, "rewards_train/1-l": -0.007722724229097366, "rewards_train/1-w": 0.94674152135849, "rewards_train/2-2": 1.4503509998321533, "rewards_train/2-w": 0.5759822130203247, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.3125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 0.9544642455875874, "rewards_train/margins_1": 0.051669538021087646, "rewards_train/margins_2": 0.8743687868118286, "step": 18 }, { "epoch": 0.06, "logps_train/policy_1_2": -159.67413330078125, "logps_train/policy_1_l": -171.51895141601562, "logps_train/policy_1_w": -141.75282287597656, "logps_train/policy_2_2": -137.12847900390625, "logps_train/policy_2_w": -172.37118530273438, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.1976248025894165, "rewards_train/1-l": -0.1972072720527649, "rewards_train/1-w": 0.8876076936721802, "rewards_train/2-2": 0.42514047026634216, "rewards_train/2-w": 0.5648341178894043, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.084814965724945, "rewards_train/margins_1": 0.6899828910827637, "rewards_train/margins_2": -0.13969364762306213, "step": 19 }, { "epoch": 0.06, "logps_train/policy_1_2": -210.0609130859375, "logps_train/policy_1_l": -141.690673828125, "logps_train/policy_1_w": -123.85653686523438, "logps_train/policy_2_2": -165.0074462890625, "logps_train/policy_2_w": -165.3431396484375, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.6860966682434082, "rewards_train/1-l": -0.07951648533344269, "rewards_train/1-w": 0.9518460035324097, "rewards_train/2-2": 1.3840222358703613, "rewards_train/2-w": 0.44732558727264404, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.0313624888658524, "rewards_train/margins_1": 0.26574933528900146, "rewards_train/margins_2": 0.9366966485977173, "step": 19 }, { "epoch": 0.06, "logps_train/policy_1_2": -141.2137451171875, "logps_train/policy_1_l": -136.3370361328125, "logps_train/policy_1_w": -152.76531982421875, "logps_train/policy_2_2": -120.75940704345703, "logps_train/policy_2_w": -177.6362762451172, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": 0.06553961336612701, "rewards_train/1-l": -0.050207480788230896, "rewards_train/1-w": 0.8224912285804749, "rewards_train/2-2": 0.3377310633659363, "rewards_train/2-w": 0.3906683325767517, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 0.8726987093687057, "rewards_train/margins_1": 0.7569516152143478, "rewards_train/margins_2": -0.05293726921081543, "step": 19 }, { "epoch": 0.06, "logps_train/policy_1_2": -142.44972229003906, "logps_train/policy_1_l": -142.62747192382812, "logps_train/policy_1_w": -118.09568786621094, "logps_train/policy_2_2": -118.03511047363281, "logps_train/policy_2_w": -142.8878173828125, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.2946769595146179, "rewards_train/1-l": -0.18110577762126923, "rewards_train/1-w": 0.7257827520370483, "rewards_train/2-2": 0.6050822734832764, "rewards_train/2-w": 0.33641472458839417, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 0.9068885296583176, "rewards_train/margins_1": 0.4311057925224304, "rewards_train/margins_2": 0.2686675488948822, "step": 19 }, { "epoch": 0.06, "learning_rate": 2.9411764705882355e-06, "loss": 1.9816, "step": 20 }, { "epoch": 0.06, "logps_train/policy_1_2": -192.03366088867188, "logps_train/policy_1_l": -182.18797302246094, "logps_train/policy_1_w": -161.45753479003906, "logps_train/policy_2_2": -167.45501708984375, "logps_train/policy_2_w": -190.83441162109375, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 0.8790566325187683, "rewards_train/1-l": -0.12280119955539703, "rewards_train/1-w": 1.3835442066192627, "rewards_train/2-2": 1.3837944269180298, "rewards_train/2-w": 0.9431197643280029, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.4375, "rewards_train/margins": 1.5063454061746597, "rewards_train/margins_1": 0.5044875741004944, "rewards_train/margins_2": 0.44067466259002686, "step": 20 }, { "epoch": 0.06, "logps_train/policy_1_2": -108.74735260009766, "logps_train/policy_1_l": -123.47766876220703, "logps_train/policy_1_w": -105.34545135498047, "logps_train/policy_2_2": -95.0068359375, "logps_train/policy_2_w": -122.50506591796875, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": 0.3059287965297699, "rewards_train/1-l": -0.1707158237695694, "rewards_train/1-w": 0.4900640547275543, "rewards_train/2-2": 0.576659619808197, "rewards_train/2-w": 0.2174617499113083, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.4375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 0.6607798784971237, "rewards_train/margins_1": 0.18413525819778442, "rewards_train/margins_2": 0.35919786989688873, "step": 20 }, { "epoch": 0.06, "logps_train/policy_1_2": -211.95941162109375, "logps_train/policy_1_l": -197.84786987304688, "logps_train/policy_1_w": -167.2411346435547, "logps_train/policy_2_2": -176.31434631347656, "logps_train/policy_2_w": -207.99757385253906, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 0.5899960994720459, "rewards_train/1-l": -0.09870275855064392, "rewards_train/1-w": 1.0385817289352417, "rewards_train/2-2": 1.2216906547546387, "rewards_train/2-w": 0.37407130002975464, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 1.1372844874858856, "rewards_train/margins_1": 0.4485856294631958, "rewards_train/margins_2": 0.847619354724884, "step": 20 }, { "epoch": 0.06, "logps_train/policy_1_2": -124.25157165527344, "logps_train/policy_1_l": -145.51272583007812, "logps_train/policy_1_w": -115.1479263305664, "logps_train/policy_2_2": -105.53282928466797, "logps_train/policy_2_w": -135.74176025390625, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": 0.2961311936378479, "rewards_train/1-l": 0.058382548391819, "rewards_train/1-w": 0.9016622304916382, "rewards_train/2-2": 0.6271857023239136, "rewards_train/2-w": 0.5291452407836914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 0.8432796820998192, "rewards_train/margins_1": 0.6055310368537903, "rewards_train/margins_2": 0.09804046154022217, "step": 20 }, { "epoch": 0.06, "logps_train/policy_1_2": -199.98678588867188, "logps_train/policy_1_l": -204.92323303222656, "logps_train/policy_1_w": -188.01341247558594, "logps_train/policy_2_2": -163.84719848632812, "logps_train/policy_2_w": -229.90493774414062, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": 0.7949719429016113, "rewards_train/1-l": -0.15706932544708252, "rewards_train/1-w": 1.9800063371658325, "rewards_train/2-2": 1.6096150875091553, "rewards_train/2-w": 1.188363790512085, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 2.137075662612915, "rewards_train/margins_1": 1.1850343942642212, "rewards_train/margins_2": 0.4212512969970703, "step": 21 }, { "epoch": 0.06, "logps_train/policy_1_2": -194.1468505859375, "logps_train/policy_1_l": -178.15054321289062, "logps_train/policy_1_w": -153.15359497070312, "logps_train/policy_2_2": -156.6849365234375, "logps_train/policy_2_w": -196.97268676757812, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": 0.5610963702201843, "rewards_train/1-l": -0.27833640575408936, "rewards_train/1-w": 1.5014376640319824, "rewards_train/2-2": 1.1756463050842285, "rewards_train/2-w": 0.8792950510978699, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.7797740697860718, "rewards_train/margins_1": 0.9403412938117981, "rewards_train/margins_2": 0.29635125398635864, "step": 21 }, { "epoch": 0.06, "logps_train/policy_1_2": -145.856201171875, "logps_train/policy_1_l": -147.14149475097656, "logps_train/policy_1_w": -139.35137939453125, "logps_train/policy_2_2": -122.18768310546875, "logps_train/policy_2_w": -166.9316864013672, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": 0.5465096235275269, "rewards_train/1-l": -0.3351939022541046, "rewards_train/1-w": 1.0430841445922852, "rewards_train/2-2": 1.0003230571746826, "rewards_train/2-w": 0.5865195393562317, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.3782780468463898, "rewards_train/margins_1": 0.4965745210647583, "rewards_train/margins_2": 0.4138035178184509, "step": 21 }, { "epoch": 0.06, "logps_train/policy_1_2": -192.73056030273438, "logps_train/policy_1_l": -194.45291137695312, "logps_train/policy_1_w": -158.52252197265625, "logps_train/policy_2_2": -170.03500366210938, "logps_train/policy_2_w": -189.05441284179688, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -191.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 0.45038077235221863, "rewards_train/1-l": -0.37566155195236206, "rewards_train/1-w": 1.4723576307296753, "rewards_train/2-2": 0.7933734655380249, "rewards_train/2-w": 0.8546177744865417, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.8480191826820374, "rewards_train/margins_1": 1.0219768583774567, "rewards_train/margins_2": -0.061244308948516846, "step": 21 }, { "epoch": 0.07, "learning_rate": 3.2352941176470594e-06, "loss": 1.8545, "step": 22 }, { "epoch": 0.07, "logps_train/policy_1_2": -143.62049865722656, "logps_train/policy_1_l": -126.88957214355469, "logps_train/policy_1_w": -101.57172393798828, "logps_train/policy_2_2": -127.7439956665039, "logps_train/policy_2_w": -117.37112426757812, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": 0.6781847476959229, "rewards_train/1-l": -0.26063740253448486, "rewards_train/1-w": 0.835796594619751, "rewards_train/2-2": 1.0535296201705933, "rewards_train/2-w": 0.4902319312095642, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 1.0964339971542358, "rewards_train/margins_1": 0.15761184692382812, "rewards_train/margins_2": 0.563297688961029, "step": 22 }, { "epoch": 0.07, "logps_train/policy_1_2": -205.47019958496094, "logps_train/policy_1_l": -180.9793701171875, "logps_train/policy_1_w": -160.3289031982422, "logps_train/policy_2_2": -181.33126831054688, "logps_train/policy_2_w": -193.5710906982422, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": 0.8811060786247253, "rewards_train/1-l": -0.5768423080444336, "rewards_train/1-w": 1.378828525543213, "rewards_train/2-2": 1.3672642707824707, "rewards_train/2-w": 0.6991408467292786, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 1.9556708335876465, "rewards_train/margins_1": 0.49772244691848755, "rewards_train/margins_2": 0.6681234240531921, "step": 22 }, { "epoch": 0.07, "logps_train/policy_1_2": -172.4950714111328, "logps_train/policy_1_l": -134.32456970214844, "logps_train/policy_1_w": -141.05992126464844, "logps_train/policy_2_2": -144.4185028076172, "logps_train/policy_2_w": -167.82701110839844, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.9570354223251343, "rewards_train/1-l": -0.2306991070508957, "rewards_train/1-w": 1.535414218902588, "rewards_train/2-2": 1.5179152488708496, "rewards_train/2-w": 0.958705723285675, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.4375, "rewards_train/margins": 1.7661133259534836, "rewards_train/margins_1": 0.5783787965774536, "rewards_train/margins_2": 0.5592095255851746, "step": 22 }, { "epoch": 0.07, "logps_train/policy_1_2": -202.14820861816406, "logps_train/policy_1_l": -167.02761840820312, "logps_train/policy_1_w": -133.23187255859375, "logps_train/policy_2_2": -169.77696228027344, "logps_train/policy_2_w": -162.4818115234375, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": 0.7039291858673096, "rewards_train/1-l": -0.024247340857982635, "rewards_train/1-w": 1.1918506622314453, "rewards_train/2-2": 1.2129297256469727, "rewards_train/2-w": 0.7350227236747742, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 1.216098003089428, "rewards_train/margins_1": 0.48792147636413574, "rewards_train/margins_2": 0.4779070019721985, "step": 22 }, { "epoch": 0.07, "logps_train/policy_1_2": -155.79449462890625, "logps_train/policy_1_l": -161.13389587402344, "logps_train/policy_1_w": -148.94679260253906, "logps_train/policy_2_2": -129.35662841796875, "logps_train/policy_2_w": -184.30142211914062, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 0.4101989269256592, "rewards_train/1-l": -0.5794051885604858, "rewards_train/1-w": 1.2199692726135254, "rewards_train/2-2": 1.0235164165496826, "rewards_train/2-w": 0.39427119493484497, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 1.7993744611740112, "rewards_train/margins_1": 0.8097703456878662, "rewards_train/margins_2": 0.6292452216148376, "step": 23 }, { "epoch": 0.07, "logps_train/policy_1_2": -110.0805892944336, "logps_train/policy_1_l": -105.71155548095703, "logps_train/policy_1_w": -76.36685180664062, "logps_train/policy_2_2": -97.56924438476562, "logps_train/policy_2_w": -90.69217681884766, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -101.5, "logps_train/ref_1_w": -81.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": 0.2638159394264221, "rewards_train/1-l": -0.43248388171195984, "rewards_train/1-w": 0.49121710658073425, "rewards_train/2-2": 0.5657324194908142, "rewards_train/2-w": 0.21535275876522064, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 0.9237009882926941, "rewards_train/margins_1": 0.22740116715431213, "rewards_train/margins_2": 0.35037966072559357, "step": 23 }, { "epoch": 0.07, "logps_train/policy_1_2": -181.5237579345703, "logps_train/policy_1_l": -207.3393096923828, "logps_train/policy_1_w": -120.50857543945312, "logps_train/policy_2_2": -147.41796875, "logps_train/policy_2_w": -152.3957061767578, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 0.38864001631736755, "rewards_train/1-l": -0.6803670525550842, "rewards_train/1-w": 1.1359100341796875, "rewards_train/2-2": 1.3605470657348633, "rewards_train/2-w": 0.4498823583126068, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 1.8162770867347717, "rewards_train/margins_1": 0.74727001786232, "rewards_train/margins_2": 0.9106647074222565, "step": 23 }, { "epoch": 0.07, "logps_train/policy_1_2": -133.12478637695312, "logps_train/policy_1_l": -136.09022521972656, "logps_train/policy_1_w": -113.54501342773438, "logps_train/policy_2_2": -113.68113708496094, "logps_train/policy_2_w": -138.79920959472656, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -0.06482197344303131, "rewards_train/1-l": -0.29730382561683655, "rewards_train/1-w": 0.8802642822265625, "rewards_train/2-2": 0.32016807794570923, "rewards_train/2-w": 0.3052356243133545, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.4375, "rewards_train/margins": 1.177568107843399, "rewards_train/margins_1": 0.9450862556695938, "rewards_train/margins_2": 0.014932453632354736, "step": 23 }, { "epoch": 0.07, "learning_rate": 3.529411764705883e-06, "loss": 1.9512, "step": 24 }, { "epoch": 0.07, "logps_train/policy_1_2": -154.988037109375, "logps_train/policy_1_l": -133.37440490722656, "logps_train/policy_1_w": -113.31239318847656, "logps_train/policy_2_2": -138.21435546875, "logps_train/policy_2_w": -135.88668823242188, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": 0.471312940120697, "rewards_train/1-l": -0.16590754687786102, "rewards_train/1-w": 0.9234479665756226, "rewards_train/2-2": 1.0282725095748901, "rewards_train/2-w": 0.35498303174972534, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.0893555134534836, "rewards_train/margins_1": 0.45213502645492554, "rewards_train/margins_2": 0.6732894778251648, "step": 24 }, { "epoch": 0.07, "logps_train/policy_1_2": -141.24415588378906, "logps_train/policy_1_l": -196.9839324951172, "logps_train/policy_1_w": -145.3151397705078, "logps_train/policy_2_2": -110.7142105102539, "logps_train/policy_2_w": -187.68881225585938, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -191.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 0.301561176776886, "rewards_train/1-l": -0.5929237604141235, "rewards_train/1-w": 1.421025276184082, "rewards_train/2-2": 0.906703770160675, "rewards_train/2-w": 0.20011314749717712, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 2.0139490365982056, "rewards_train/margins_1": 1.119464099407196, "rewards_train/margins_2": 0.7065906226634979, "step": 24 }, { "epoch": 0.07, "logps_train/policy_1_2": -187.6585693359375, "logps_train/policy_1_l": -137.0457000732422, "logps_train/policy_1_w": -134.7988739013672, "logps_train/policy_2_2": -151.39529418945312, "logps_train/policy_2_w": -170.77783203125, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": 0.37398797273635864, "rewards_train/1-l": -0.5205124020576477, "rewards_train/1-w": 1.1802690029144287, "rewards_train/2-2": 1.3301959037780762, "rewards_train/2-w": 0.23178738355636597, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 1.7007814049720764, "rewards_train/margins_1": 0.8062810301780701, "rewards_train/margins_2": 1.0984085202217102, "step": 24 }, { "epoch": 0.07, "logps_train/policy_1_2": -185.27764892578125, "logps_train/policy_1_l": -144.4807586669922, "logps_train/policy_1_w": -127.20216369628906, "logps_train/policy_2_2": -154.6812744140625, "logps_train/policy_2_w": -157.60633850097656, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": 0.45075106620788574, "rewards_train/1-l": -0.4540814757347107, "rewards_train/1-w": 0.7680652141571045, "rewards_train/2-2": 1.1141972541809082, "rewards_train/2-w": 0.1710061877965927, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 1.2221466898918152, "rewards_train/margins_1": 0.31731414794921875, "rewards_train/margins_2": 0.9431910663843155, "step": 24 }, { "epoch": 0.07, "logps_train/policy_1_2": -123.64697265625, "logps_train/policy_1_l": -118.55958557128906, "logps_train/policy_1_w": -86.32027435302734, "logps_train/policy_2_2": -96.80755615234375, "logps_train/policy_2_w": -112.18669128417969, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -94.0, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -115.5, "rewards_train/1-2": 0.19829107820987701, "rewards_train/1-l": -0.9795916080474854, "rewards_train/1-w": 0.7431677579879761, "rewards_train/2-2": 0.9876524806022644, "rewards_train/2-w": 0.3192211985588074, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.5, "rewards_train/margins": 1.7227593660354614, "rewards_train/margins_1": 0.5448766797780991, "rewards_train/margins_2": 0.668431282043457, "step": 25 }, { "epoch": 0.07, "logps_train/policy_1_2": -92.24092102050781, "logps_train/policy_1_l": -120.70304870605469, "logps_train/policy_1_w": -114.09434509277344, "logps_train/policy_2_2": -78.06423950195312, "logps_train/policy_2_w": -138.6539306640625, "logps_train/ref_1_2": -91.5, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -81.5, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -0.07213891297578812, "rewards_train/1-l": -0.6741138100624084, "rewards_train/1-w": 0.9544323086738586, "rewards_train/2-2": 0.3474823832511902, "rewards_train/2-w": 0.39652180671691895, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 1.628546118736267, "rewards_train/margins_1": 1.0265712216496468, "rewards_train/margins_2": -0.04903942346572876, "step": 25 }, { "epoch": 0.07, "logps_train/policy_1_2": -210.87252807617188, "logps_train/policy_1_l": -170.7855682373047, "logps_train/policy_1_w": -145.64337158203125, "logps_train/policy_2_2": -167.8146209716797, "logps_train/policy_2_w": -194.86956787109375, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 0.2629433274269104, "rewards_train/1-l": -0.7812414765357971, "rewards_train/1-w": 1.4192564487457275, "rewards_train/2-2": 1.5439282655715942, "rewards_train/2-w": 0.32866889238357544, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.2004979252815247, "rewards_train/margins_1": 1.1563131213188171, "rewards_train/margins_2": 1.2152593731880188, "step": 25 }, { "epoch": 0.07, "logps_train/policy_1_2": -122.70750427246094, "logps_train/policy_1_l": -122.99543762207031, "logps_train/policy_1_w": -122.75062561035156, "logps_train/policy_2_2": -99.42222595214844, "logps_train/policy_2_w": -141.0682830810547, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": 0.48218008875846863, "rewards_train/1-l": -0.3401693105697632, "rewards_train/1-w": 0.963610053062439, "rewards_train/2-2": 1.0028947591781616, "rewards_train/2-w": 0.473640114068985, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 1.3037793636322021, "rewards_train/margins_1": 0.48142996430397034, "rewards_train/margins_2": 0.5292546451091766, "step": 25 }, { "epoch": 0.08, "learning_rate": 3.8235294117647055e-06, "loss": 1.805, "step": 26 }, { "epoch": 0.08, "logps_train/policy_1_2": -136.80062866210938, "logps_train/policy_1_l": -140.57913208007812, "logps_train/policy_1_w": -129.8925018310547, "logps_train/policy_2_2": -112.62481689453125, "logps_train/policy_2_w": -159.22830200195312, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 0.3855624198913574, "rewards_train/1-l": -0.7750512361526489, "rewards_train/1-w": 1.0933668613433838, "rewards_train/2-2": 1.0843925476074219, "rewards_train/2-w": -0.07600328326225281, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.8684180974960327, "rewards_train/margins_1": 0.7078044414520264, "rewards_train/margins_2": 1.1603958308696747, "step": 26 }, { "epoch": 0.08, "logps_train/policy_1_2": -109.02120971679688, "logps_train/policy_1_l": -91.4888916015625, "logps_train/policy_1_w": -69.29712677001953, "logps_train/policy_2_2": -84.84008026123047, "logps_train/policy_2_w": -93.0064926147461, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -86.0, "logps_train/ref_1_w": -71.5, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -88.0, "rewards_train/1-2": 0.19866012036800385, "rewards_train/1-l": -0.5602172613143921, "rewards_train/1-w": 0.19997522234916687, "rewards_train/2-2": 0.9392343759536743, "rewards_train/2-w": -0.5143211483955383, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 0.760192483663559, "rewards_train/margins_1": 0.001315101981163025, "rewards_train/margins_2": 1.4535555243492126, "step": 26 }, { "epoch": 0.08, "logps_train/policy_1_2": -141.2210693359375, "logps_train/policy_1_l": -125.02693176269531, "logps_train/policy_1_w": -117.93339538574219, "logps_train/policy_2_2": -112.63008117675781, "logps_train/policy_2_w": -152.34471130371094, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.28179946541786194, "rewards_train/1-l": -0.7428790330886841, "rewards_train/1-w": 1.129122018814087, "rewards_train/2-2": 1.1373822689056396, "rewards_train/2-w": -0.006931483745574951, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.872001051902771, "rewards_train/margins_1": 0.847322553396225, "rewards_train/margins_2": 1.1443137526512146, "step": 26 }, { "epoch": 0.08, "logps_train/policy_1_2": -215.18572998046875, "logps_train/policy_1_l": -194.77810668945312, "logps_train/policy_1_w": -176.82815551757812, "logps_train/policy_2_2": -179.10760498046875, "logps_train/policy_2_w": -216.5306396484375, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": 0.2525200843811035, "rewards_train/1-l": -1.213063359260559, "rewards_train/1-w": 1.1607383489608765, "rewards_train/2-2": 1.3197095394134521, "rewards_train/2-w": -0.4069689214229584, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.3738017082214355, "rewards_train/margins_1": 0.908218264579773, "rewards_train/margins_2": 1.7266784608364105, "step": 26 }, { "epoch": 0.08, "logps_train/policy_1_2": -171.38742065429688, "logps_train/policy_1_l": -146.73793029785156, "logps_train/policy_1_w": -121.58975982666016, "logps_train/policy_2_2": -139.13592529296875, "logps_train/policy_2_w": -155.2979278564453, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": 0.4307898283004761, "rewards_train/1-l": -1.2980120182037354, "rewards_train/1-w": 1.2961022853851318, "rewards_train/2-2": 1.321563482284546, "rewards_train/2-w": 0.15712100267410278, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.594114303588867, "rewards_train/margins_1": 0.8653124570846558, "rewards_train/margins_2": 1.1644424796104431, "step": 27 }, { "epoch": 0.08, "logps_train/policy_1_2": -167.16421508789062, "logps_train/policy_1_l": -166.86343383789062, "logps_train/policy_1_w": -114.97834777832031, "logps_train/policy_2_2": -128.64117431640625, "logps_train/policy_2_w": -150.66152954101562, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": 0.1824072003364563, "rewards_train/1-l": -1.0455710887908936, "rewards_train/1-w": 1.1451337337493896, "rewards_train/2-2": 1.5084415674209595, "rewards_train/2-w": -0.18568378686904907, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.190704822540283, "rewards_train/margins_1": 0.9627265334129333, "rewards_train/margins_2": 1.6941253542900085, "step": 27 }, { "epoch": 0.08, "logps_train/policy_1_2": -175.3022918701172, "logps_train/policy_1_l": -131.08558654785156, "logps_train/policy_1_w": -125.30125427246094, "logps_train/policy_2_2": -136.159423828125, "logps_train/policy_2_w": -167.80508422851562, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -0.09312009811401367, "rewards_train/1-l": -0.6213533282279968, "rewards_train/1-w": 0.9112796783447266, "rewards_train/2-2": 1.5220462083816528, "rewards_train/2-w": -0.3719153106212616, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 1.5326330065727234, "rewards_train/margins_1": 1.0043997764587402, "rewards_train/margins_2": 1.8939615190029144, "step": 27 }, { "epoch": 0.08, "logps_train/policy_1_2": -167.4500274658203, "logps_train/policy_1_l": -120.95188903808594, "logps_train/policy_1_w": -143.97705078125, "logps_train/policy_2_2": -129.13092041015625, "logps_train/policy_2_w": -183.62733459472656, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": 0.0065593719482421875, "rewards_train/1-l": -0.7299544811248779, "rewards_train/1-w": 1.5269054174423218, "rewards_train/2-2": 1.0665948390960693, "rewards_train/2-w": 0.09468874335289001, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 2.2568598985671997, "rewards_train/margins_1": 1.5203460454940796, "rewards_train/margins_2": 0.9719060957431793, "step": 27 }, { "epoch": 0.08, "learning_rate": 4.11764705882353e-06, "loss": 1.7311, "step": 28 }, { "epoch": 0.08, "logps_train/policy_1_2": -112.80885314941406, "logps_train/policy_1_l": -96.09354400634766, "logps_train/policy_1_w": -110.24530029296875, "logps_train/policy_2_2": -92.76618957519531, "logps_train/policy_2_w": -134.8367462158203, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -89.5, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -0.09461209923028946, "rewards_train/1-l": -0.6818946599960327, "rewards_train/1-w": 1.2239080667495728, "rewards_train/2-2": 0.7248377799987793, "rewards_train/2-w": 0.35324007272720337, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.9058027267456055, "rewards_train/margins_1": 1.3185201659798622, "rewards_train/margins_2": 0.3715977072715759, "step": 28 }, { "epoch": 0.08, "logps_train/policy_1_2": -136.9502716064453, "logps_train/policy_1_l": -154.43014526367188, "logps_train/policy_1_w": -130.06134033203125, "logps_train/policy_2_2": -108.46681213378906, "logps_train/policy_2_w": -159.1690673828125, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": 0.5221607685089111, "rewards_train/1-l": -1.198874592781067, "rewards_train/1-w": 1.2931829690933228, "rewards_train/2-2": 1.329880714416504, "rewards_train/2-w": 0.06766348332166672, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.4920575618743896, "rewards_train/margins_1": 0.7710222005844116, "rewards_train/margins_2": 1.2622172310948372, "step": 28 }, { "epoch": 0.08, "logps_train/policy_1_2": -106.58921813964844, "logps_train/policy_1_l": -122.33226013183594, "logps_train/policy_1_w": -98.6041488647461, "logps_train/policy_2_2": -85.11920928955078, "logps_train/policy_2_w": -136.96563720703125, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 0.022327736020088196, "rewards_train/1-l": -0.7637922763824463, "rewards_train/1-w": 0.8261085748672485, "rewards_train/2-2": 0.7949148416519165, "rewards_train/2-w": -0.18172121047973633, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 1.5899008512496948, "rewards_train/margins_1": 0.8037808388471603, "rewards_train/margins_2": 0.9766360521316528, "step": 28 }, { "epoch": 0.08, "logps_train/policy_1_2": -176.85574340820312, "logps_train/policy_1_l": -209.51171875, "logps_train/policy_1_w": -165.04278564453125, "logps_train/policy_2_2": -147.46966552734375, "logps_train/policy_2_w": -197.4061737060547, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": 0.2277066707611084, "rewards_train/1-l": -1.5783684253692627, "rewards_train/1-w": 1.2575862407684326, "rewards_train/2-2": 1.3786190748214722, "rewards_train/2-w": -0.06434763967990875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 2.8359546661376953, "rewards_train/margins_1": 1.0298795700073242, "rewards_train/margins_2": 1.442966714501381, "step": 28 }, { "epoch": 0.09, "logps_train/policy_1_2": -152.5701446533203, "logps_train/policy_1_l": -118.50529479980469, "logps_train/policy_1_w": -89.0992431640625, "logps_train/policy_2_2": -110.84495544433594, "logps_train/policy_2_w": -113.39144897460938, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": -0.45779499411582947, "rewards_train/1-l": -1.2033122777938843, "rewards_train/1-w": 0.3726936876773834, "rewards_train/2-2": 1.198512077331543, "rewards_train/2-w": -0.3643406629562378, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 1.5760059654712677, "rewards_train/margins_1": 0.8304886817932129, "rewards_train/margins_2": 1.5628527402877808, "step": 29 }, { "epoch": 0.09, "logps_train/policy_1_2": -140.7069549560547, "logps_train/policy_1_l": -115.88337707519531, "logps_train/policy_1_w": -97.86322021484375, "logps_train/policy_2_2": -106.73930358886719, "logps_train/policy_2_w": -124.7589111328125, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -104.5, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": -0.39672142267227173, "rewards_train/1-l": -1.1052809953689575, "rewards_train/1-w": 1.21992826461792, "rewards_train/2-2": 1.0389118194580078, "rewards_train/2-w": 0.20692208409309387, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 2.3252092599868774, "rewards_train/margins_1": 1.6166496872901917, "rewards_train/margins_2": 0.8319897353649139, "step": 29 }, { "epoch": 0.09, "logps_train/policy_1_2": -122.17267608642578, "logps_train/policy_1_l": -136.80018615722656, "logps_train/policy_1_w": -99.93553924560547, "logps_train/policy_2_2": -96.55896759033203, "logps_train/policy_2_w": -126.74958801269531, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": -0.28113552927970886, "rewards_train/1-l": -1.310706615447998, "rewards_train/1-w": 1.035352110862732, "rewards_train/2-2": 0.6686150431632996, "rewards_train/2-w": 0.1601974070072174, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 2.34605872631073, "rewards_train/margins_1": 1.3164876401424408, "rewards_train/margins_2": 0.5084176361560822, "step": 29 }, { "epoch": 0.09, "logps_train/policy_1_2": -198.45559692382812, "logps_train/policy_1_l": -251.5771484375, "logps_train/policy_1_w": -134.56051635742188, "logps_train/policy_2_2": -158.11500549316406, "logps_train/policy_2_w": -179.15399169921875, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": 0.06947837769985199, "rewards_train/1-l": -2.5272464752197266, "rewards_train/1-w": 1.3566433191299438, "rewards_train/2-2": 1.4224836826324463, "rewards_train/2-w": -0.40856286883354187, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.8838897943496704, "rewards_train/margins_1": 1.2871649414300919, "rewards_train/margins_2": 1.8310465514659882, "step": 29 }, { "epoch": 0.09, "learning_rate": 4.411764705882353e-06, "loss": 1.7186, "step": 30 }, { "epoch": 0.09, "logps_train/policy_1_2": -138.6177978515625, "logps_train/policy_1_l": -131.5700225830078, "logps_train/policy_1_w": -134.5953369140625, "logps_train/policy_2_2": -100.61493682861328, "logps_train/policy_2_w": -175.6536102294922, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -0.5660780072212219, "rewards_train/1-l": -1.7676469087600708, "rewards_train/1-w": 1.5224974155426025, "rewards_train/2-2": 0.8426081538200378, "rewards_train/2-w": -0.1696581244468689, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2901443243026733, "rewards_train/margins_1": 2.0885754227638245, "rewards_train/margins_2": 1.0122662782669067, "step": 30 }, { "epoch": 0.09, "logps_train/policy_1_2": -122.07783508300781, "logps_train/policy_1_l": -138.670654296875, "logps_train/policy_1_w": -95.89698791503906, "logps_train/policy_2_2": -87.16453552246094, "logps_train/policy_2_w": -128.9364013671875, "logps_train/ref_1_2": -119.5, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -118.5, "rewards_train/1-2": -0.2800493836402893, "rewards_train/1-l": -1.140258550643921, "rewards_train/1-w": 0.2989731729030609, "rewards_train/2-2": 0.902687668800354, "rewards_train/2-w": -1.0487169027328491, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 1.4392317235469818, "rewards_train/margins_1": 0.5790225565433502, "rewards_train/margins_2": 1.9514045715332031, "step": 30 }, { "epoch": 0.09, "logps_train/policy_1_2": -166.65521240234375, "logps_train/policy_1_l": -144.36647033691406, "logps_train/policy_1_w": -100.43103790283203, "logps_train/policy_2_2": -128.02488708496094, "logps_train/policy_2_w": -126.44790649414062, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": -0.2112237811088562, "rewards_train/1-l": -1.000563383102417, "rewards_train/1-w": 1.027208685874939, "rewards_train/2-2": 1.0768078565597534, "rewards_train/2-w": -0.09240974485874176, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.027772068977356, "rewards_train/margins_1": 1.2384324669837952, "rewards_train/margins_2": 1.1692176014184952, "step": 30 }, { "epoch": 0.09, "logps_train/policy_1_2": -152.64498901367188, "logps_train/policy_1_l": -149.5825653076172, "logps_train/policy_1_w": -109.86134338378906, "logps_train/policy_2_2": -115.6037826538086, "logps_train/policy_2_w": -155.86761474609375, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -0.6672327518463135, "rewards_train/1-l": -1.345170259475708, "rewards_train/1-w": 0.5171863436698914, "rewards_train/2-2": 0.7169655561447144, "rewards_train/2-w": -1.014886736869812, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 1.8623566031455994, "rewards_train/margins_1": 1.1844190955162048, "rewards_train/margins_2": 1.7318522930145264, "step": 30 }, { "epoch": 0.09, "logps_train/policy_1_2": -159.80325317382812, "logps_train/policy_1_l": -140.95396423339844, "logps_train/policy_1_w": -129.51284790039062, "logps_train/policy_2_2": -125.16991424560547, "logps_train/policy_2_w": -159.92752075195312, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.19165247678756714, "rewards_train/1-l": -1.3282086849212646, "rewards_train/1-w": 1.1591633558273315, "rewards_train/2-2": 1.2837903499603271, "rewards_train/2-w": -0.09704825282096863, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.487372040748596, "rewards_train/margins_1": 1.3508158326148987, "rewards_train/margins_2": 1.3808386027812958, "step": 31 }, { "epoch": 0.09, "logps_train/policy_1_2": -214.32794189453125, "logps_train/policy_1_l": -226.067626953125, "logps_train/policy_1_w": -177.88150024414062, "logps_train/policy_2_2": -167.16551208496094, "logps_train/policy_2_w": -223.9621124267578, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": -0.8325995802879333, "rewards_train/1-l": -2.476097822189331, "rewards_train/1-w": 1.3641929626464844, "rewards_train/2-2": 1.492628812789917, "rewards_train/2-w": -0.7548047304153442, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.8402907848358154, "rewards_train/margins_1": 2.1967925429344177, "rewards_train/margins_2": 2.2474335432052612, "step": 31 }, { "epoch": 0.09, "logps_train/policy_1_2": -166.6395263671875, "logps_train/policy_1_l": -182.1953582763672, "logps_train/policy_1_w": -139.91806030273438, "logps_train/policy_2_2": -138.4232177734375, "logps_train/policy_2_w": -180.0794677734375, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -0.14212605357170105, "rewards_train/1-l": -1.8852832317352295, "rewards_train/1-w": 1.1579504013061523, "rewards_train/2-2": 1.0047494173049927, "rewards_train/2-w": -0.3747430443763733, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.5625, "rewards_train/margins": 3.043233633041382, "rewards_train/margins_1": 1.3000764548778534, "rewards_train/margins_2": 1.379492461681366, "step": 31 }, { "epoch": 0.09, "logps_train/policy_1_2": -248.53823852539062, "logps_train/policy_1_l": -236.79910278320312, "logps_train/policy_1_w": -178.67398071289062, "logps_train/policy_2_2": -193.822021484375, "logps_train/policy_2_w": -241.62130737304688, "logps_train/ref_1_2": -243.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -233.0, "rewards_train/1-2": -0.5530432462692261, "rewards_train/1-l": -3.6517858505249023, "rewards_train/1-w": 1.8708815574645996, "rewards_train/2-2": 1.5763920545578003, "rewards_train/2-w": -0.8449422121047974, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.522667407989502, "rewards_train/margins_1": 2.4239248037338257, "rewards_train/margins_2": 2.4213342666625977, "step": 31 }, { "epoch": 0.1, "learning_rate": 4.705882352941177e-06, "loss": 1.7583, "step": 32 }, { "epoch": 0.1, "logps_train/policy_1_2": -172.16262817382812, "logps_train/policy_1_l": -131.91053771972656, "logps_train/policy_1_w": -102.832763671875, "logps_train/policy_2_2": -124.62992095947266, "logps_train/policy_2_w": -138.82266235351562, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": -0.6103055477142334, "rewards_train/1-l": -1.3741841316223145, "rewards_train/1-w": 0.4756110608577728, "rewards_train/2-2": 1.7623004913330078, "rewards_train/2-w": -1.0472064018249512, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.8497951924800873, "rewards_train/margins_1": 1.0859166085720062, "rewards_train/margins_2": 2.809506893157959, "step": 32 }, { "epoch": 0.1, "logps_train/policy_1_2": -147.49729919433594, "logps_train/policy_1_l": -116.495361328125, "logps_train/policy_1_w": -105.75650024414062, "logps_train/policy_2_2": -116.34524536132812, "logps_train/policy_2_w": -136.06790161132812, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.1694105565547943, "rewards_train/1-l": -1.1595947742462158, "rewards_train/1-w": 1.2507166862487793, "rewards_train/2-2": 1.4873498678207397, "rewards_train/2-w": -0.1562030017375946, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.410311460494995, "rewards_train/margins_1": 1.081306129693985, "rewards_train/margins_2": 1.6435528695583344, "step": 32 }, { "epoch": 0.1, "logps_train/policy_1_2": -235.3975830078125, "logps_train/policy_1_l": -173.0066375732422, "logps_train/policy_1_w": -177.29238891601562, "logps_train/policy_2_2": -177.13674926757812, "logps_train/policy_2_w": -246.59620666503906, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": -0.8553840517997742, "rewards_train/1-l": -1.642949104309082, "rewards_train/1-w": 2.4791595935821533, "rewards_train/2-2": 1.6992158889770508, "rewards_train/2-w": -0.8242696523666382, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.122108697891235, "rewards_train/margins_1": 3.3345436453819275, "rewards_train/margins_2": 2.523485541343689, "step": 32 }, { "epoch": 0.1, "logps_train/policy_1_2": -151.51101684570312, "logps_train/policy_1_l": -159.16746520996094, "logps_train/policy_1_w": -132.7335205078125, "logps_train/policy_2_2": -114.5001220703125, "logps_train/policy_2_w": -173.56851196289062, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -0.8218063712120056, "rewards_train/1-l": -1.5710439682006836, "rewards_train/1-w": 1.322740912437439, "rewards_train/2-2": 0.7849481105804443, "rewards_train/2-w": -0.41466224193573, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 2.8937848806381226, "rewards_train/margins_1": 2.1445472836494446, "rewards_train/margins_2": 1.1996103525161743, "step": 32 }, { "epoch": 0.1, "logps_train/policy_1_2": -186.2440643310547, "logps_train/policy_1_l": -204.15045166015625, "logps_train/policy_1_w": -130.40658569335938, "logps_train/policy_2_2": -145.6839599609375, "logps_train/policy_2_w": -177.9100341796875, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -0.550187885761261, "rewards_train/1-l": -2.137115478515625, "rewards_train/1-w": 1.3266253471374512, "rewards_train/2-2": 1.1550405025482178, "rewards_train/2-w": -0.38192182779312134, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.463740825653076, "rewards_train/margins_1": 1.8768132328987122, "rewards_train/margins_2": 1.5369623303413391, "step": 33 }, { "epoch": 0.1, "logps_train/policy_1_2": -209.70364379882812, "logps_train/policy_1_l": -150.93287658691406, "logps_train/policy_1_w": -147.69174194335938, "logps_train/policy_2_2": -159.69906616210938, "logps_train/policy_2_w": -198.20953369140625, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -0.5445831418037415, "rewards_train/1-l": -1.5689220428466797, "rewards_train/1-w": 2.187952995300293, "rewards_train/2-2": 1.9535300731658936, "rewards_train/2-w": -0.24634476006031036, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.7568750381469727, "rewards_train/margins_1": 2.7325361371040344, "rewards_train/margins_2": 2.199874833226204, "step": 33 }, { "epoch": 0.1, "logps_train/policy_1_2": -207.10650634765625, "logps_train/policy_1_l": -190.43856811523438, "logps_train/policy_1_w": -163.05279541015625, "logps_train/policy_2_2": -154.67984008789062, "logps_train/policy_2_w": -201.79568481445312, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.3671951293945312, "rewards_train/1-l": -2.623202323913574, "rewards_train/1-w": 1.3189395666122437, "rewards_train/2-2": 1.2394071817398071, "rewards_train/2-w": -0.2524208426475525, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.942141890525818, "rewards_train/margins_1": 2.686134696006775, "rewards_train/margins_2": 1.4918280243873596, "step": 33 }, { "epoch": 0.1, "logps_train/policy_1_2": -159.17068481445312, "logps_train/policy_1_l": -142.6903839111328, "logps_train/policy_1_w": -133.53744506835938, "logps_train/policy_2_2": -123.64388275146484, "logps_train/policy_2_w": -173.11480712890625, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -0.808474063873291, "rewards_train/1-l": -2.025679588317871, "rewards_train/1-w": 1.016763687133789, "rewards_train/2-2": 0.9979165196418762, "rewards_train/2-w": -0.441949725151062, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.04244327545166, "rewards_train/margins_1": 1.82523775100708, "rewards_train/margins_2": 1.4398662447929382, "step": 33 }, { "epoch": 0.1, "learning_rate": 5e-06, "loss": 1.4704, "step": 34 }, { "epoch": 0.1, "logps_train/policy_1_2": -184.28024291992188, "logps_train/policy_1_l": -162.3912811279297, "logps_train/policy_1_w": -164.89549255371094, "logps_train/policy_2_2": -150.29852294921875, "logps_train/policy_2_w": -220.773193359375, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": -0.5248997211456299, "rewards_train/1-l": -1.5022141933441162, "rewards_train/1-w": 1.673732042312622, "rewards_train/2-2": 0.9806936979293823, "rewards_train/2-w": -1.0029051303863525, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1759462356567383, "rewards_train/margins_1": 2.198631763458252, "rewards_train/margins_2": 1.9835988283157349, "step": 34 }, { "epoch": 0.1, "logps_train/policy_1_2": -164.31228637695312, "logps_train/policy_1_l": -171.5739288330078, "logps_train/policy_1_w": -132.44317626953125, "logps_train/policy_2_2": -131.88589477539062, "logps_train/policy_2_w": -169.99282836914062, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -0.7788852453231812, "rewards_train/1-l": -2.3896193504333496, "rewards_train/1-w": 1.081658124923706, "rewards_train/2-2": 0.9317231178283691, "rewards_train/2-w": -0.7125642895698547, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.4712774753570557, "rewards_train/margins_1": 1.8605433702468872, "rewards_train/margins_2": 1.6442874073982239, "step": 34 }, { "epoch": 0.1, "logps_train/policy_1_2": -113.99537658691406, "logps_train/policy_1_l": -93.92823028564453, "logps_train/policy_1_w": -80.17121887207031, "logps_train/policy_2_2": -92.04271697998047, "logps_train/policy_2_w": -109.69639587402344, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -88.0, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -105.5, "rewards_train/1-2": -0.23508526384830475, "rewards_train/1-l": -1.193408489227295, "rewards_train/1-w": 0.7473315000534058, "rewards_train/2-2": 0.8533456325531006, "rewards_train/2-w": -0.4200305640697479, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 1.9407399892807007, "rewards_train/margins_1": 0.9824167639017105, "rewards_train/margins_2": 1.2733761966228485, "step": 34 }, { "epoch": 0.1, "logps_train/policy_1_2": -184.96133422851562, "logps_train/policy_1_l": -153.25326538085938, "logps_train/policy_1_w": -120.92060089111328, "logps_train/policy_2_2": -145.8382110595703, "logps_train/policy_2_w": -156.99752807617188, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.06265555322170258, "rewards_train/1-l": -1.4278647899627686, "rewards_train/1-w": 1.0886037349700928, "rewards_train/2-2": 1.853091835975647, "rewards_train/2-w": -0.510007381439209, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.5164685249328613, "rewards_train/margins_1": 1.0259481817483902, "rewards_train/margins_2": 2.363099217414856, "step": 34 }, { "epoch": 0.1, "logps_train/policy_1_2": -170.6981201171875, "logps_train/policy_1_l": -130.31573486328125, "logps_train/policy_1_w": -116.74442291259766, "logps_train/policy_2_2": -126.95836639404297, "logps_train/policy_2_w": -153.28977966308594, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -0.9955936670303345, "rewards_train/1-l": -2.0962228775024414, "rewards_train/1-w": 1.4310262203216553, "rewards_train/2-2": 1.1986942291259766, "rewards_train/2-w": -0.21960289776325226, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.5272490978240967, "rewards_train/margins_1": 2.4266198873519897, "rewards_train/margins_2": 1.4182971268892288, "step": 35 }, { "epoch": 0.1, "logps_train/policy_1_2": -173.392822265625, "logps_train/policy_1_l": -185.50808715820312, "logps_train/policy_1_w": -160.9181671142578, "logps_train/policy_2_2": -132.50970458984375, "logps_train/policy_2_w": -215.6864776611328, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -0.9032852649688721, "rewards_train/1-l": -2.6227283477783203, "rewards_train/1-w": 1.0694981813430786, "rewards_train/2-2": 1.08137845993042, "rewards_train/2-w": -1.558760643005371, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.692226529121399, "rewards_train/margins_1": 1.9727834463119507, "rewards_train/margins_2": 2.640139102935791, "step": 35 }, { "epoch": 0.1, "logps_train/policy_1_2": -199.63699340820312, "logps_train/policy_1_l": -270.30377197265625, "logps_train/policy_1_w": -157.88809204101562, "logps_train/policy_2_2": -160.2286376953125, "logps_train/policy_2_w": -199.69448852539062, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -0.5548137426376343, "rewards_train/1-l": -4.182380199432373, "rewards_train/1-w": 1.2244714498519897, "rewards_train/2-2": 1.2359259128570557, "rewards_train/2-w": -0.9006972312927246, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.406851649284363, "rewards_train/margins_1": 1.779285192489624, "rewards_train/margins_2": 2.1366231441497803, "step": 35 }, { "epoch": 0.1, "logps_train/policy_1_2": -162.5793914794922, "logps_train/policy_1_l": -240.05401611328125, "logps_train/policy_1_w": -116.09918975830078, "logps_train/policy_2_2": -120.90316772460938, "logps_train/policy_2_w": -147.7531280517578, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -1.1667284965515137, "rewards_train/1-l": -3.8192691802978516, "rewards_train/1-w": 0.9154713153839111, "rewards_train/2-2": 1.0749173164367676, "rewards_train/2-w": -0.5971879959106445, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.734740495681763, "rewards_train/margins_1": 2.082199811935425, "rewards_train/margins_2": 1.672105312347412, "step": 35 }, { "epoch": 0.11, "learning_rate": 4.999878002203615e-06, "loss": 1.5557, "step": 36 }, { "epoch": 0.11, "logps_train/policy_1_2": -163.07403564453125, "logps_train/policy_1_l": -189.60556030273438, "logps_train/policy_1_w": -141.515380859375, "logps_train/policy_2_2": -120.36651611328125, "logps_train/policy_2_w": -178.295654296875, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.263653963804245, "rewards_train/1-l": -2.4339945316314697, "rewards_train/1-w": 1.1799076795578003, "rewards_train/2-2": 1.7624691724777222, "rewards_train/2-w": -0.7897218465805054, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.61390221118927, "rewards_train/margins_1": 1.4435616433620453, "rewards_train/margins_2": 2.5521910190582275, "step": 36 }, { "epoch": 0.11, "logps_train/policy_1_2": -236.22198486328125, "logps_train/policy_1_l": -219.88088989257812, "logps_train/policy_1_w": -152.85365295410156, "logps_train/policy_2_2": -175.07577514648438, "logps_train/policy_2_w": -216.3704376220703, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -1.3530560731887817, "rewards_train/1-l": -2.0243701934814453, "rewards_train/1-w": 1.570103645324707, "rewards_train/2-2": 1.3455485105514526, "rewards_train/2-w": -0.8229799866676331, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5944738388061523, "rewards_train/margins_1": 2.9231597185134888, "rewards_train/margins_2": 2.1685284972190857, "step": 36 }, { "epoch": 0.11, "logps_train/policy_1_2": -168.6816864013672, "logps_train/policy_1_l": -140.28915405273438, "logps_train/policy_1_w": -136.00625610351562, "logps_train/policy_2_2": -127.15255737304688, "logps_train/policy_2_w": -179.87840270996094, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.0632858276367188, "rewards_train/1-l": -1.3132898807525635, "rewards_train/1-w": 1.101717233657837, "rewards_train/2-2": 1.028494119644165, "rewards_train/2-w": -0.9574690461158752, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.4150071144104004, "rewards_train/margins_1": 2.1650030612945557, "rewards_train/margins_2": 1.9859631657600403, "step": 36 }, { "epoch": 0.11, "logps_train/policy_1_2": -175.6721649169922, "logps_train/policy_1_l": -144.26165771484375, "logps_train/policy_1_w": -143.96383666992188, "logps_train/policy_2_2": -136.23902893066406, "logps_train/policy_2_w": -187.0314178466797, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -0.8289344310760498, "rewards_train/1-l": -2.109739303588867, "rewards_train/1-w": 1.4551777839660645, "rewards_train/2-2": 0.9159413576126099, "rewards_train/2-w": -0.503140926361084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5649170875549316, "rewards_train/margins_1": 2.2841122150421143, "rewards_train/margins_2": 1.4190822839736938, "step": 36 }, { "epoch": 0.11, "logps_train/policy_1_2": -217.56321716308594, "logps_train/policy_1_l": -158.30044555664062, "logps_train/policy_1_w": -108.78678131103516, "logps_train/policy_2_2": -158.11007690429688, "logps_train/policy_2_w": -155.49459838867188, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -1.9485094547271729, "rewards_train/1-l": -1.9519197940826416, "rewards_train/1-w": 0.7866541147232056, "rewards_train/2-2": 1.006960391998291, "rewards_train/2-w": -1.4244606494903564, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.738573908805847, "rewards_train/margins_1": 2.7351635694503784, "rewards_train/margins_2": 2.4314210414886475, "step": 37 }, { "epoch": 0.11, "logps_train/policy_1_2": -174.35655212402344, "logps_train/policy_1_l": -194.34243774414062, "logps_train/policy_1_w": -108.4437255859375, "logps_train/policy_2_2": -133.12387084960938, "logps_train/policy_2_w": -139.154296875, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -0.8085063695907593, "rewards_train/1-l": -2.479848861694336, "rewards_train/1-w": 0.9073855876922607, "rewards_train/2-2": 1.3602678775787354, "rewards_train/2-w": -0.4751588702201843, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3872344493865967, "rewards_train/margins_1": 1.71589195728302, "rewards_train/margins_2": 1.8354267477989197, "step": 37 }, { "epoch": 0.11, "logps_train/policy_1_2": -173.06356811523438, "logps_train/policy_1_l": -124.14569091796875, "logps_train/policy_1_w": -145.35577392578125, "logps_train/policy_2_2": -129.48924255371094, "logps_train/policy_2_w": -189.78892517089844, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -0.8575273156166077, "rewards_train/1-l": -1.240691900253296, "rewards_train/1-w": 1.0841491222381592, "rewards_train/2-2": 1.0122090578079224, "rewards_train/2-w": -1.120982050895691, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.324841022491455, "rewards_train/margins_1": 1.9416764378547668, "rewards_train/margins_2": 2.1331911087036133, "step": 37 }, { "epoch": 0.11, "logps_train/policy_1_2": -196.77598571777344, "logps_train/policy_1_l": -129.5210418701172, "logps_train/policy_1_w": -105.9070053100586, "logps_train/policy_2_2": -147.63748168945312, "logps_train/policy_2_w": -141.00552368164062, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -112.5, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -1.2061131000518799, "rewards_train/1-l": -1.707963228225708, "rewards_train/1-w": 1.1950418949127197, "rewards_train/2-2": 1.6397674083709717, "rewards_train/2-w": -0.6107087135314941, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.9030051231384277, "rewards_train/margins_1": 2.4011549949645996, "rewards_train/margins_2": 2.250476121902466, "step": 37 }, { "epoch": 0.11, "learning_rate": 4.9995120207212275e-06, "loss": 1.4755, "step": 38 }, { "epoch": 0.11, "logps_train/policy_1_2": -165.83363342285156, "logps_train/policy_1_l": -176.86328125, "logps_train/policy_1_w": -117.44995880126953, "logps_train/policy_2_2": -130.43484497070312, "logps_train/policy_2_w": -156.9458770751953, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.6958634853363037, "rewards_train/1-l": -2.6157217025756836, "rewards_train/1-w": 0.831566572189331, "rewards_train/2-2": 1.1815149784088135, "rewards_train/2-w": -0.9281824231147766, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.4472882747650146, "rewards_train/margins_1": 1.5274300575256348, "rewards_train/margins_2": 2.10969740152359, "step": 38 }, { "epoch": 0.11, "logps_train/policy_1_2": -258.7195129394531, "logps_train/policy_1_l": -255.7557373046875, "logps_train/policy_1_w": -190.53421020507812, "logps_train/policy_2_2": -201.36441040039062, "logps_train/policy_2_w": -253.8151397705078, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -219.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -244.0, "rewards_train/1-2": -1.0106244087219238, "rewards_train/1-l": -3.688173294067383, "rewards_train/1-w": 2.157517194747925, "rewards_train/2-2": 1.7039878368377686, "rewards_train/2-w": -1.0049502849578857, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.845690488815308, "rewards_train/margins_1": 3.1681416034698486, "rewards_train/margins_2": 2.7089381217956543, "step": 38 }, { "epoch": 0.11, "logps_train/policy_1_2": -186.11386108398438, "logps_train/policy_1_l": -166.89244079589844, "logps_train/policy_1_w": -143.92034912109375, "logps_train/policy_2_2": -139.64260864257812, "logps_train/policy_2_w": -201.2754364013672, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -0.542489767074585, "rewards_train/1-l": -2.451256275177002, "rewards_train/1-w": 1.6403865814208984, "rewards_train/2-2": 1.7958214282989502, "rewards_train/2-w": -1.3275442123413086, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.0916428565979, "rewards_train/margins_1": 2.1828763484954834, "rewards_train/margins_2": 3.123365640640259, "step": 38 }, { "epoch": 0.11, "logps_train/policy_1_2": -158.28280639648438, "logps_train/policy_1_l": -137.9793701171875, "logps_train/policy_1_w": -137.6688232421875, "logps_train/policy_2_2": -118.05171203613281, "logps_train/policy_2_w": -183.01718139648438, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -0.6599212288856506, "rewards_train/1-l": -1.54388427734375, "rewards_train/1-w": 1.743762493133545, "rewards_train/2-2": 1.5555704832077026, "rewards_train/2-w": -0.366195946931839, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.287646770477295, "rewards_train/margins_1": 2.4036837220191956, "rewards_train/margins_2": 1.9217664301395416, "step": 38 }, { "epoch": 0.12, "logps_train/policy_1_2": -181.26461791992188, "logps_train/policy_1_l": -170.11843872070312, "logps_train/policy_1_w": -111.99779510498047, "logps_train/policy_2_2": -129.67132568359375, "logps_train/policy_2_w": -155.7168731689453, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -0.8795861601829529, "rewards_train/1-l": -2.1430938243865967, "rewards_train/1-w": 1.1066174507141113, "rewards_train/2-2": 1.3211488723754883, "rewards_train/2-w": -0.8767659068107605, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.249711275100708, "rewards_train/margins_1": 1.9862036108970642, "rewards_train/margins_2": 2.197914779186249, "step": 39 }, { "epoch": 0.12, "logps_train/policy_1_2": -216.843994140625, "logps_train/policy_1_l": -178.36526489257812, "logps_train/policy_1_w": -152.08157348632812, "logps_train/policy_2_2": -160.43814086914062, "logps_train/policy_2_w": -205.64410400390625, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.2823972702026367, "rewards_train/1-l": -1.885697603225708, "rewards_train/1-w": 1.156294822692871, "rewards_train/2-2": 1.7425135374069214, "rewards_train/2-w": -1.6942929029464722, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.041992425918579, "rewards_train/margins_1": 2.438692092895508, "rewards_train/margins_2": 3.4368064403533936, "step": 39 }, { "epoch": 0.12, "logps_train/policy_1_2": -168.37017822265625, "logps_train/policy_1_l": -205.21969604492188, "logps_train/policy_1_w": -133.2811279296875, "logps_train/policy_2_2": -118.83570861816406, "logps_train/policy_2_w": -189.11447143554688, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -1.4780343770980835, "rewards_train/1-l": -2.548495292663574, "rewards_train/1-w": 0.9387813806533813, "rewards_train/2-2": 1.0908427238464355, "rewards_train/2-w": -1.5198453664779663, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.4872766733169556, "rewards_train/margins_1": 2.416815757751465, "rewards_train/margins_2": 2.610688090324402, "step": 39 }, { "epoch": 0.12, "logps_train/policy_1_2": -190.8046417236328, "logps_train/policy_1_l": -117.74546813964844, "logps_train/policy_1_w": -136.42147827148438, "logps_train/policy_2_2": -135.85800170898438, "logps_train/policy_2_w": -189.6466064453125, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -1.3578085899353027, "rewards_train/1-l": -1.5821161270141602, "rewards_train/1-w": 1.3152011632919312, "rewards_train/2-2": 1.4306068420410156, "rewards_train/2-w": -1.1075307130813599, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.8973172903060913, "rewards_train/margins_1": 2.673009753227234, "rewards_train/margins_2": 2.5381375551223755, "step": 39 }, { "epoch": 0.12, "learning_rate": 4.998902091271986e-06, "loss": 1.2997, "step": 40 }, { "epoch": 0.12, "logps_train/policy_1_2": -177.12069702148438, "logps_train/policy_1_l": -139.85464477539062, "logps_train/policy_1_w": -128.40786743164062, "logps_train/policy_2_2": -138.21829223632812, "logps_train/policy_2_w": -174.63641357421875, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -1.098008394241333, "rewards_train/1-l": -1.9933751821517944, "rewards_train/1-w": 1.2732768058776855, "rewards_train/2-2": 1.070357322692871, "rewards_train/2-w": -1.1242859363555908, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.26665198802948, "rewards_train/margins_1": 2.3712852001190186, "rewards_train/margins_2": 2.194643259048462, "step": 40 }, { "epoch": 0.12, "logps_train/policy_1_2": -142.83575439453125, "logps_train/policy_1_l": -121.92212677001953, "logps_train/policy_1_w": -100.66351318359375, "logps_train/policy_2_2": -100.10145568847656, "logps_train/policy_2_w": -143.8209228515625, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": -1.0441237688064575, "rewards_train/1-l": -1.3255654573440552, "rewards_train/1-w": 0.9035705924034119, "rewards_train/2-2": 1.1748154163360596, "rewards_train/2-w": -1.2680286169052124, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.229136049747467, "rewards_train/margins_1": 1.9476943612098694, "rewards_train/margins_2": 2.442844033241272, "step": 40 }, { "epoch": 0.12, "logps_train/policy_1_2": -213.12130737304688, "logps_train/policy_1_l": -158.4971160888672, "logps_train/policy_1_w": -127.75901794433594, "logps_train/policy_2_2": -159.26658630371094, "logps_train/policy_2_w": -178.28768920898438, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -0.9884006977081299, "rewards_train/1-l": -1.378019094467163, "rewards_train/1-w": 1.581520438194275, "rewards_train/2-2": 1.582716703414917, "rewards_train/2-w": -0.9111907482147217, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.959539532661438, "rewards_train/margins_1": 2.569921135902405, "rewards_train/margins_2": 2.4939074516296387, "step": 40 }, { "epoch": 0.12, "logps_train/policy_1_2": -156.91552734375, "logps_train/policy_1_l": -152.60507202148438, "logps_train/policy_1_w": -124.44337463378906, "logps_train/policy_2_2": -119.26460266113281, "logps_train/policy_2_w": -183.901611328125, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.6618643999099731, "rewards_train/1-l": -1.5073951482772827, "rewards_train/1-w": 1.2519633769989014, "rewards_train/2-2": 1.0917038917541504, "rewards_train/2-w": -1.3837165832519531, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.759358525276184, "rewards_train/margins_1": 1.9138277769088745, "rewards_train/margins_2": 2.4754204750061035, "step": 40 }, { "epoch": 0.12, "logps_train/policy_1_2": -211.94692993164062, "logps_train/policy_1_l": -194.04893493652344, "logps_train/policy_1_w": -175.51593017578125, "logps_train/policy_2_2": -154.2369384765625, "logps_train/policy_2_w": -237.77735900878906, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": -1.3077802658081055, "rewards_train/1-l": -1.9848837852478027, "rewards_train/1-w": 1.9765331745147705, "rewards_train/2-2": 1.6412484645843506, "rewards_train/2-w": -0.6542981266975403, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9614169597625732, "rewards_train/margins_1": 3.284313440322876, "rewards_train/margins_2": 2.295546591281891, "step": 41 }, { "epoch": 0.12, "logps_train/policy_1_2": -156.79342651367188, "logps_train/policy_1_l": -175.281005859375, "logps_train/policy_1_w": -118.60442352294922, "logps_train/policy_2_2": -112.51747131347656, "logps_train/policy_2_w": -173.57911682128906, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.5699667930603027, "rewards_train/1-l": -2.448413610458374, "rewards_train/1-w": 0.9754953384399414, "rewards_train/2-2": 0.6347758173942566, "rewards_train/2-w": -1.3528339862823486, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4239089488983154, "rewards_train/margins_1": 2.545462131500244, "rewards_train/margins_2": 1.9876098036766052, "step": 41 }, { "epoch": 0.12, "logps_train/policy_1_2": -230.05731201171875, "logps_train/policy_1_l": -207.6419677734375, "logps_train/policy_1_w": -157.3697967529297, "logps_train/policy_2_2": -183.61505126953125, "logps_train/policy_2_w": -208.0506134033203, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -1.0225287675857544, "rewards_train/1-l": -2.1626343727111816, "rewards_train/1-w": 0.9963210225105286, "rewards_train/2-2": 1.4101747274398804, "rewards_train/2-w": -1.6454914808273315, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.15895539522171, "rewards_train/margins_1": 2.018849790096283, "rewards_train/margins_2": 3.055666208267212, "step": 41 }, { "epoch": 0.12, "logps_train/policy_1_2": -201.18258666992188, "logps_train/policy_1_l": -197.93914794921875, "logps_train/policy_1_w": -135.62954711914062, "logps_train/policy_2_2": -151.64230346679688, "logps_train/policy_2_w": -188.05519104003906, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.0225554704666138, "rewards_train/1-l": -2.0054373741149902, "rewards_train/1-w": 1.3604836463928223, "rewards_train/2-2": 1.6855741739273071, "rewards_train/2-w": -1.576613187789917, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.3659210205078125, "rewards_train/margins_1": 2.383039116859436, "rewards_train/margins_2": 3.262187361717224, "step": 41 }, { "epoch": 0.13, "learning_rate": 4.99804827338393e-06, "loss": 1.454, "step": 42 }, { "epoch": 0.13, "logps_train/policy_1_2": -201.96063232421875, "logps_train/policy_1_l": -152.6468048095703, "logps_train/policy_1_w": -139.19003295898438, "logps_train/policy_2_2": -151.4898681640625, "logps_train/policy_2_w": -187.47744750976562, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -0.4502609372138977, "rewards_train/1-l": -2.0438060760498047, "rewards_train/1-w": 1.5081452131271362, "rewards_train/2-2": 1.9610886573791504, "rewards_train/2-w": -1.0981355905532837, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.551951289176941, "rewards_train/margins_1": 1.958406150341034, "rewards_train/margins_2": 3.059224247932434, "step": 42 }, { "epoch": 0.13, "logps_train/policy_1_2": -171.51446533203125, "logps_train/policy_1_l": -172.0654754638672, "logps_train/policy_1_w": -139.85128784179688, "logps_train/policy_2_2": -130.24533081054688, "logps_train/policy_2_w": -191.55386352539062, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.8721487522125244, "rewards_train/1-l": -2.020005226135254, "rewards_train/1-w": 1.3506629467010498, "rewards_train/2-2": 1.5504664182662964, "rewards_train/2-w": -0.8761863112449646, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3706681728363037, "rewards_train/margins_1": 2.222811698913574, "rewards_train/margins_2": 2.426652729511261, "step": 42 }, { "epoch": 0.13, "logps_train/policy_1_2": -191.04315185546875, "logps_train/policy_1_l": -259.31878662109375, "logps_train/policy_1_w": -157.9792022705078, "logps_train/policy_2_2": -142.74810791015625, "logps_train/policy_2_w": -230.1971435546875, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -1.1183788776397705, "rewards_train/1-l": -2.9232864379882812, "rewards_train/1-w": 1.6034477949142456, "rewards_train/2-2": 1.3062429428100586, "rewards_train/2-w": -1.6017444133758545, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.526734232902527, "rewards_train/margins_1": 2.721826672554016, "rewards_train/margins_2": 2.907987356185913, "step": 42 }, { "epoch": 0.13, "logps_train/policy_1_2": -178.13247680664062, "logps_train/policy_1_l": -156.1944580078125, "logps_train/policy_1_w": -113.7523193359375, "logps_train/policy_2_2": -130.87478637695312, "logps_train/policy_2_w": -169.30616760253906, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -0.6964508891105652, "rewards_train/1-l": -1.7204701900482178, "rewards_train/1-w": 1.68975830078125, "rewards_train/2-2": 1.7834205627441406, "rewards_train/2-w": -0.8138200640678406, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.4102284908294678, "rewards_train/margins_1": 2.386209189891815, "rewards_train/margins_2": 2.597240626811981, "step": 42 }, { "epoch": 0.13, "logps_train/policy_1_2": -214.59841918945312, "logps_train/policy_1_l": -211.36538696289062, "logps_train/policy_1_w": -151.55340576171875, "logps_train/policy_2_2": -159.5579833984375, "logps_train/policy_2_w": -203.138671875, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.9377717971801758, "rewards_train/1-l": -2.1614418029785156, "rewards_train/1-w": 1.0409479141235352, "rewards_train/2-2": 1.2338511943817139, "rewards_train/2-w": -1.5874992609024048, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.202389717102051, "rewards_train/margins_1": 2.978719711303711, "rewards_train/margins_2": 2.8213504552841187, "step": 43 }, { "epoch": 0.13, "logps_train/policy_1_2": -162.63522338867188, "logps_train/policy_1_l": -195.246826171875, "logps_train/policy_1_w": -136.82327270507812, "logps_train/policy_2_2": -111.89631652832031, "logps_train/policy_2_w": -203.08026123046875, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.4572720527648926, "rewards_train/1-l": -2.2249274253845215, "rewards_train/1-w": 1.5899381637573242, "rewards_train/2-2": 1.2144699096679688, "rewards_train/2-w": -1.4939627647399902, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8148655891418457, "rewards_train/margins_1": 3.047210216522217, "rewards_train/margins_2": 2.708432674407959, "step": 43 }, { "epoch": 0.13, "logps_train/policy_1_2": -226.70733642578125, "logps_train/policy_1_l": -204.802978515625, "logps_train/policy_1_w": -208.47535705566406, "logps_train/policy_2_2": -166.23410034179688, "logps_train/policy_2_w": -281.2499694824219, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -227.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -264.0, "rewards_train/1-2": -1.4066709280014038, "rewards_train/1-l": -1.6636474132537842, "rewards_train/1-w": 1.8419164419174194, "rewards_train/2-2": 1.8156516551971436, "rewards_train/2-w": -1.7468717098236084, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.5055638551712036, "rewards_train/margins_1": 3.2485873699188232, "rewards_train/margins_2": 3.562523365020752, "step": 43 }, { "epoch": 0.13, "logps_train/policy_1_2": -151.78192138671875, "logps_train/policy_1_l": -138.6370849609375, "logps_train/policy_1_w": -110.50706481933594, "logps_train/policy_2_2": -117.98634338378906, "logps_train/policy_2_w": -149.335205078125, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -0.5302420854568481, "rewards_train/1-l": -1.231372356414795, "rewards_train/1-w": 1.1035897731781006, "rewards_train/2-2": 1.099412202835083, "rewards_train/2-w": -0.6561771631240845, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.3349621295928955, "rewards_train/margins_1": 1.6338318586349487, "rewards_train/margins_2": 1.7555893659591675, "step": 43 }, { "epoch": 0.13, "learning_rate": 4.996950650388179e-06, "loss": 1.3548, "step": 44 }, { "epoch": 0.13, "logps_train/policy_1_2": -195.38577270507812, "logps_train/policy_1_l": -128.3151397705078, "logps_train/policy_1_w": -130.78646850585938, "logps_train/policy_2_2": -149.00515747070312, "logps_train/policy_2_w": -174.93673706054688, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -115.5, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": -0.45635148882865906, "rewards_train/1-l": -1.2834670543670654, "rewards_train/1-w": 1.054103970527649, "rewards_train/2-2": 1.8332737684249878, "rewards_train/2-w": -0.8055874109268188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.3375710248947144, "rewards_train/margins_1": 1.510455459356308, "rewards_train/margins_2": 2.6388611793518066, "step": 44 }, { "epoch": 0.13, "logps_train/policy_1_2": -188.27349853515625, "logps_train/policy_1_l": -157.06561279296875, "logps_train/policy_1_w": -112.52903747558594, "logps_train/policy_2_2": -129.51983642578125, "logps_train/policy_2_w": -166.34326171875, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -0.9916068911552429, "rewards_train/1-l": -1.4791200160980225, "rewards_train/1-w": 1.08635413646698, "rewards_train/2-2": 2.042548656463623, "rewards_train/2-w": -1.4081536531448364, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.5654741525650024, "rewards_train/margins_1": 2.077961027622223, "rewards_train/margins_2": 3.4507023096084595, "step": 44 }, { "epoch": 0.13, "logps_train/policy_1_2": -184.48928833007812, "logps_train/policy_1_l": -191.62957763671875, "logps_train/policy_1_w": -113.75111389160156, "logps_train/policy_2_2": -156.40615844726562, "logps_train/policy_2_w": -140.55491638183594, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 0.17528913915157318, "rewards_train/1-l": -2.2657876014709473, "rewards_train/1-w": 0.6357289552688599, "rewards_train/2-2": 1.5671968460083008, "rewards_train/2-w": -0.7547107934951782, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.901516556739807, "rewards_train/margins_1": 0.4604398161172867, "rewards_train/margins_2": 2.321907639503479, "step": 44 }, { "epoch": 0.13, "logps_train/policy_1_2": -86.18600463867188, "logps_train/policy_1_l": -69.07441711425781, "logps_train/policy_1_w": -50.789222717285156, "logps_train/policy_2_2": -68.6605224609375, "logps_train/policy_2_w": -69.4461441040039, "logps_train/ref_1_2": -86.0, "logps_train/ref_1_l": -64.0, "logps_train/ref_1_w": -56.0, "logps_train/ref_2_2": -76.0, "logps_train/ref_2_w": -67.0, "rewards_train/1-2": -0.004538334906101227, "rewards_train/1-l": -0.5017774701118469, "rewards_train/1-w": 0.5320149660110474, "rewards_train/2-2": 0.7419561147689819, "rewards_train/2-w": -0.2512552738189697, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 1.0337924361228943, "rewards_train/margins_1": 0.5365533009171486, "rewards_train/margins_2": 0.9932113885879517, "step": 44 }, { "epoch": 0.13, "logps_train/policy_1_2": -196.5608673095703, "logps_train/policy_1_l": -210.84475708007812, "logps_train/policy_1_w": -153.18539428710938, "logps_train/policy_2_2": -151.25418090820312, "logps_train/policy_2_w": -204.88424682617188, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -0.7244464159011841, "rewards_train/1-l": -2.48401141166687, "rewards_train/1-w": 1.7416894435882568, "rewards_train/2-2": 1.75391685962677, "rewards_train/2-w": -0.9520974159240723, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.225700855255127, "rewards_train/margins_1": 2.466135859489441, "rewards_train/margins_2": 2.7060142755508423, "step": 45 }, { "epoch": 0.13, "logps_train/policy_1_2": -154.69180297851562, "logps_train/policy_1_l": -187.0970001220703, "logps_train/policy_1_w": -122.08460998535156, "logps_train/policy_2_2": -120.06241607666016, "logps_train/policy_2_w": -164.913330078125, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -0.15511710941791534, "rewards_train/1-l": -2.084456443786621, "rewards_train/1-w": 1.6325546503067017, "rewards_train/2-2": 1.4310626983642578, "rewards_train/2-w": -0.35061028599739075, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.7170110940933228, "rewards_train/margins_1": 1.787671759724617, "rewards_train/margins_2": 1.7816729843616486, "step": 45 }, { "epoch": 0.13, "logps_train/policy_1_2": -180.55728149414062, "logps_train/policy_1_l": -176.64190673828125, "logps_train/policy_1_w": -144.66372680664062, "logps_train/policy_2_2": -142.92332458496094, "logps_train/policy_2_w": -185.5936279296875, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.28893160820007324, "rewards_train/1-l": -1.46653413772583, "rewards_train/1-w": 1.546323299407959, "rewards_train/2-2": 1.6568864583969116, "rewards_train/2-w": -0.21580827236175537, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.012857437133789, "rewards_train/margins_1": 1.8352549076080322, "rewards_train/margins_2": 1.872694730758667, "step": 45 }, { "epoch": 0.13, "logps_train/policy_1_2": -169.83294677734375, "logps_train/policy_1_l": -193.20091247558594, "logps_train/policy_1_w": -142.8164825439453, "logps_train/policy_2_2": -120.12791442871094, "logps_train/policy_2_w": -197.17840576171875, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.809660792350769, "rewards_train/1-l": -2.205808162689209, "rewards_train/1-w": 1.9226487874984741, "rewards_train/2-2": 1.5558850765228271, "rewards_train/2-w": -0.7459641695022583, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.128456950187683, "rewards_train/margins_1": 2.732309579849243, "rewards_train/margins_2": 2.3018492460250854, "step": 45 }, { "epoch": 0.14, "learning_rate": 4.9956093294108046e-06, "loss": 1.4097, "step": 46 }, { "epoch": 0.14, "logps_train/policy_1_2": -151.66461181640625, "logps_train/policy_1_l": -161.79547119140625, "logps_train/policy_1_w": -109.31533813476562, "logps_train/policy_2_2": -102.94242858886719, "logps_train/policy_2_w": -167.02203369140625, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -0.9555235505104065, "rewards_train/1-l": -1.7367000579833984, "rewards_train/1-w": 1.5833098888397217, "rewards_train/2-2": 1.4873979091644287, "rewards_train/2-w": -1.0654851198196411, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.32000994682312, "rewards_train/margins_1": 2.538833439350128, "rewards_train/margins_2": 2.55288302898407, "step": 46 }, { "epoch": 0.14, "logps_train/policy_1_2": -280.2986755371094, "logps_train/policy_1_l": -175.51171875, "logps_train/policy_1_w": -142.62081909179688, "logps_train/policy_2_2": -192.357177734375, "logps_train/policy_2_w": -205.44476318359375, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -215.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -2.243929862976074, "rewards_train/1-l": -1.6017576456069946, "rewards_train/1-w": 1.7227801084518433, "rewards_train/2-2": 2.285571575164795, "rewards_train/2-w": -1.132563591003418, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.324537754058838, "rewards_train/margins_1": 3.9667099714279175, "rewards_train/margins_2": 3.418135166168213, "step": 46 }, { "epoch": 0.14, "logps_train/policy_1_2": -120.98715209960938, "logps_train/policy_1_l": -78.35799407958984, "logps_train/policy_1_w": -112.272705078125, "logps_train/policy_2_2": -93.8502426147461, "logps_train/policy_2_w": -143.6964569091797, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -71.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -0.48543375730514526, "rewards_train/1-l": -0.7397665977478027, "rewards_train/1-w": 1.40692138671875, "rewards_train/2-2": 0.7294285893440247, "rewards_train/2-w": -0.11449463665485382, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.1466879844665527, "rewards_train/margins_1": 1.8923551440238953, "rewards_train/margins_2": 0.8439232259988785, "step": 46 }, { "epoch": 0.14, "logps_train/policy_1_2": -158.5092315673828, "logps_train/policy_1_l": -210.1247100830078, "logps_train/policy_1_w": -142.3619384765625, "logps_train/policy_2_2": -119.61527252197266, "logps_train/policy_2_w": -195.844482421875, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -0.7587358355522156, "rewards_train/1-l": -1.9958690404891968, "rewards_train/1-w": 1.4559931755065918, "rewards_train/2-2": 1.07948899269104, "rewards_train/2-w": -1.021946668624878, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.4518622159957886, "rewards_train/margins_1": 2.2147290110588074, "rewards_train/margins_2": 2.101435661315918, "step": 46 }, { "epoch": 0.14, "logps_train/policy_1_2": -173.66885375976562, "logps_train/policy_1_l": -108.49879455566406, "logps_train/policy_1_w": -92.30168914794922, "logps_train/policy_2_2": -126.34564208984375, "logps_train/policy_2_w": -145.53826904296875, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": -1.6616122722625732, "rewards_train/1-l": -1.1535296440124512, "rewards_train/1-w": 0.9340887069702148, "rewards_train/2-2": 1.038092017173767, "rewards_train/2-w": -1.670623540878296, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.087618350982666, "rewards_train/margins_1": 2.595700979232788, "rewards_train/margins_2": 2.708715558052063, "step": 47 }, { "epoch": 0.14, "logps_train/policy_1_2": -123.1739501953125, "logps_train/policy_1_l": -145.61798095703125, "logps_train/policy_1_w": -80.69184112548828, "logps_train/policy_2_2": -93.03703308105469, "logps_train/policy_2_w": -102.84024810791016, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -101.5, "rewards_train/1-2": -0.15802057087421417, "rewards_train/1-l": -1.2175604104995728, "rewards_train/1-w": 0.7997610569000244, "rewards_train/2-2": 1.3771562576293945, "rewards_train/2-w": -0.13246259093284607, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 2.017321467399597, "rewards_train/margins_1": 0.9577816277742386, "rewards_train/margins_2": 1.5096188485622406, "step": 47 }, { "epoch": 0.14, "logps_train/policy_1_2": -170.61166381835938, "logps_train/policy_1_l": -189.06689453125, "logps_train/policy_1_w": -179.36679077148438, "logps_train/policy_2_2": -139.48849487304688, "logps_train/policy_2_w": -224.00732421875, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -0.08147956430912018, "rewards_train/1-l": -1.669385552406311, "rewards_train/1-w": 1.825430154800415, "rewards_train/2-2": 1.5113060474395752, "rewards_train/2-w": -0.2687023878097534, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.494815707206726, "rewards_train/margins_1": 1.9069097191095352, "rewards_train/margins_2": 1.7800084352493286, "step": 47 }, { "epoch": 0.14, "logps_train/policy_1_2": -197.99114990234375, "logps_train/policy_1_l": -176.1732177734375, "logps_train/policy_1_w": -157.89920043945312, "logps_train/policy_2_2": -156.2769775390625, "logps_train/policy_2_w": -207.00979614257812, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -0.24520842730998993, "rewards_train/1-l": -1.8587290048599243, "rewards_train/1-w": 1.8045144081115723, "rewards_train/2-2": 1.928160548210144, "rewards_train/2-w": -0.7276414632797241, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.6632434129714966, "rewards_train/margins_1": 2.049722835421562, "rewards_train/margins_2": 2.655802011489868, "step": 47 }, { "epoch": 0.14, "learning_rate": 4.994024441362366e-06, "loss": 1.402, "step": 48 }, { "epoch": 0.14, "logps_train/policy_1_2": -162.3292694091797, "logps_train/policy_1_l": -143.03343200683594, "logps_train/policy_1_w": -113.10498046875, "logps_train/policy_2_2": -117.38894653320312, "logps_train/policy_2_w": -155.8737030029297, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -1.2881994247436523, "rewards_train/1-l": -0.91181480884552, "rewards_train/1-w": 1.2037591934204102, "rewards_train/2-2": 1.1659880876541138, "rewards_train/2-w": -1.3713550567626953, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.11557400226593, "rewards_train/margins_1": 2.4919586181640625, "rewards_train/margins_2": 2.537343144416809, "step": 48 }, { "epoch": 0.14, "logps_train/policy_1_2": -116.44711303710938, "logps_train/policy_1_l": -74.38819122314453, "logps_train/policy_1_w": -94.1257095336914, "logps_train/policy_2_2": -85.50056457519531, "logps_train/policy_2_w": -126.85127258300781, "logps_train/ref_1_2": -112.5, "logps_train/ref_1_l": -67.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": -0.42674311995506287, "rewards_train/1-l": -0.666675329208374, "rewards_train/1-w": 1.2045187950134277, "rewards_train/2-2": 1.0456465482711792, "rewards_train/2-w": -0.28004851937294006, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.8711941242218018, "rewards_train/margins_1": 1.6312619149684906, "rewards_train/margins_2": 1.3256950676441193, "step": 48 }, { "epoch": 0.14, "logps_train/policy_1_2": -215.7302703857422, "logps_train/policy_1_l": -199.29054260253906, "logps_train/policy_1_w": -132.10458374023438, "logps_train/policy_2_2": -166.09275817871094, "logps_train/policy_2_w": -174.10629272460938, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.9487097263336182, "rewards_train/1-l": -2.232569694519043, "rewards_train/1-w": 1.5558499097824097, "rewards_train/2-2": 1.7650903463363647, "rewards_train/2-w": -0.47845134139060974, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.7884196043014526, "rewards_train/margins_1": 2.504559636116028, "rewards_train/margins_2": 2.2435416877269745, "step": 48 }, { "epoch": 0.14, "logps_train/policy_1_2": -142.53964233398438, "logps_train/policy_1_l": -135.2275848388672, "logps_train/policy_1_w": -104.54063415527344, "logps_train/policy_2_2": -111.03192138671875, "logps_train/policy_2_w": -134.44369506835938, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.24290966987609863, "rewards_train/1-l": -1.160649061203003, "rewards_train/1-w": 1.2447645664215088, "rewards_train/2-2": 1.7370418310165405, "rewards_train/2-w": -0.09515175223350525, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.4054136276245117, "rewards_train/margins_1": 1.0018548965454102, "rewards_train/margins_2": 1.8321935832500458, "step": 48 }, { "epoch": 0.15, "logps_train/policy_1_2": -165.40826416015625, "logps_train/policy_1_l": -128.0726318359375, "logps_train/policy_1_w": -100.60867309570312, "logps_train/policy_2_2": -121.11677551269531, "logps_train/policy_2_w": -133.479248046875, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": -0.5900456309318542, "rewards_train/1-l": -0.7787477970123291, "rewards_train/1-w": 0.9910858273506165, "rewards_train/2-2": 1.4570728540420532, "rewards_train/2-w": -0.6272218823432922, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 1.7698336243629456, "rewards_train/margins_1": 1.5811314582824707, "rewards_train/margins_2": 2.0842947363853455, "step": 49 }, { "epoch": 0.15, "logps_train/policy_1_2": -176.60028076171875, "logps_train/policy_1_l": -172.70794677734375, "logps_train/policy_1_w": -126.52975463867188, "logps_train/policy_2_2": -141.79946899414062, "logps_train/policy_2_w": -166.19212341308594, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.6326842308044434, "rewards_train/1-l": -1.6197444200515747, "rewards_train/1-w": 1.3591344356536865, "rewards_train/2-2": 1.048567533493042, "rewards_train/2-w": -0.6633526086807251, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9788788557052612, "rewards_train/margins_1": 1.9918186664581299, "rewards_train/margins_2": 1.711920142173767, "step": 49 }, { "epoch": 0.15, "logps_train/policy_1_2": -113.93700408935547, "logps_train/policy_1_l": -112.77034759521484, "logps_train/policy_1_w": -88.32579040527344, "logps_train/policy_2_2": -92.75257873535156, "logps_train/policy_2_w": -113.53887939453125, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": -0.17622025310993195, "rewards_train/1-l": -0.6380214691162109, "rewards_train/1-w": 0.9609752893447876, "rewards_train/2-2": 0.8864609003067017, "rewards_train/2-w": -0.09627120196819305, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 1.5989967584609985, "rewards_train/margins_1": 1.1371955424547195, "rewards_train/margins_2": 0.9827321022748947, "step": 49 }, { "epoch": 0.15, "logps_train/policy_1_2": -234.43011474609375, "logps_train/policy_1_l": -199.90042114257812, "logps_train/policy_1_w": -157.9779510498047, "logps_train/policy_2_2": -179.16697692871094, "logps_train/policy_2_w": -223.27035522460938, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -1.3828539848327637, "rewards_train/1-l": -0.9857452511787415, "rewards_train/1-w": 1.345369815826416, "rewards_train/2-2": 1.6192402839660645, "rewards_train/2-w": -1.568833827972412, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.3311150670051575, "rewards_train/margins_1": 2.7282238006591797, "rewards_train/margins_2": 3.1880741119384766, "step": 49 }, { "epoch": 0.15, "learning_rate": 4.9921961409251465e-06, "loss": 1.4775, "step": 50 }, { "epoch": 0.15, "logps_train/policy_1_2": -261.51556396484375, "logps_train/policy_1_l": -164.2589874267578, "logps_train/policy_1_w": -149.79092407226562, "logps_train/policy_2_2": -204.29281616210938, "logps_train/policy_2_w": -207.33267211914062, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -227.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -0.6273361444473267, "rewards_train/1-l": -1.5803056955337524, "rewards_train/1-w": 1.9612760543823242, "rewards_train/2-2": 2.2554850578308105, "rewards_train/2-w": -0.96092689037323, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.5415817499160767, "rewards_train/margins_1": 2.588612198829651, "rewards_train/margins_2": 3.2164119482040405, "step": 50 }, { "epoch": 0.15, "logps_train/policy_1_2": -155.7590789794922, "logps_train/policy_1_l": -133.85488891601562, "logps_train/policy_1_w": -98.67342376708984, "logps_train/policy_2_2": -120.10052490234375, "logps_train/policy_2_w": -134.73007202148438, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -108.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": -0.6438766121864319, "rewards_train/1-l": -0.9516996145248413, "rewards_train/1-w": 0.9951578378677368, "rewards_train/2-2": 1.1282285451889038, "rewards_train/2-w": -0.5347270965576172, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.9468574523925781, "rewards_train/margins_1": 1.6390344500541687, "rewards_train/margins_2": 1.662955641746521, "step": 50 }, { "epoch": 0.15, "logps_train/policy_1_2": -119.75987243652344, "logps_train/policy_1_l": -99.80315399169922, "logps_train/policy_1_w": -102.66109466552734, "logps_train/policy_2_2": -88.31251525878906, "logps_train/policy_2_w": -133.95669555664062, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -98.5, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": -0.4291123151779175, "rewards_train/1-l": -0.8968072533607483, "rewards_train/1-w": 1.5108436346054077, "rewards_train/2-2": 1.0195295810699463, "rewards_train/2-w": -0.28785574436187744, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.407650887966156, "rewards_train/margins_1": 1.9399559497833252, "rewards_train/margins_2": 1.3073853254318237, "step": 50 }, { "epoch": 0.15, "logps_train/policy_1_2": -195.9310302734375, "logps_train/policy_1_l": -204.65286254882812, "logps_train/policy_1_w": -169.42852783203125, "logps_train/policy_2_2": -152.07989501953125, "logps_train/policy_2_w": -236.1090087890625, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -0.6110705137252808, "rewards_train/1-l": -1.757265329360962, "rewards_train/1-w": 2.231769323348999, "rewards_train/2-2": 1.6517773866653442, "rewards_train/2-w": -1.0634407997131348, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.989034652709961, "rewards_train/margins_1": 2.84283983707428, "rewards_train/margins_2": 2.715218186378479, "step": 50 }, { "epoch": 0.15, "logps_train/policy_1_2": -214.6526641845703, "logps_train/policy_1_l": -206.31625366210938, "logps_train/policy_1_w": -185.01718139648438, "logps_train/policy_2_2": -166.61309814453125, "logps_train/policy_2_w": -234.0043182373047, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": -0.0371403694152832, "rewards_train/1-l": -1.5954922437667847, "rewards_train/1-w": 1.7285561561584473, "rewards_train/2-2": 2.4324395656585693, "rewards_train/2-w": -0.49886900186538696, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.324048399925232, "rewards_train/margins_1": 1.7656965255737305, "rewards_train/margins_2": 2.9313085675239563, "step": 51 }, { "epoch": 0.15, "logps_train/policy_1_2": -180.95193481445312, "logps_train/policy_1_l": -144.63697814941406, "logps_train/policy_1_w": -128.42755126953125, "logps_train/policy_2_2": -135.06488037109375, "logps_train/policy_2_w": -174.176513671875, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -0.6518344879150391, "rewards_train/1-l": -1.6587185859680176, "rewards_train/1-w": 1.562859058380127, "rewards_train/2-2": 2.0524966716766357, "rewards_train/2-w": -0.3339601755142212, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2215776443481445, "rewards_train/margins_1": 2.214693546295166, "rewards_train/margins_2": 2.386456847190857, "step": 51 }, { "epoch": 0.15, "logps_train/policy_1_2": -183.09458923339844, "logps_train/policy_1_l": -209.22361755371094, "logps_train/policy_1_w": -172.13427734375, "logps_train/policy_2_2": -129.78256225585938, "logps_train/policy_2_w": -234.19778442382812, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": -0.6875836849212646, "rewards_train/1-l": -1.2622053623199463, "rewards_train/1-w": 1.4678210020065308, "rewards_train/2-2": 1.8342437744140625, "rewards_train/2-w": -1.3213417530059814, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.730026364326477, "rewards_train/margins_1": 2.1554046869277954, "rewards_train/margins_2": 3.155585527420044, "step": 51 }, { "epoch": 0.15, "logps_train/policy_1_2": -98.85273742675781, "logps_train/policy_1_l": -100.28812408447266, "logps_train/policy_1_w": -109.69102478027344, "logps_train/policy_2_2": -72.50494384765625, "logps_train/policy_2_w": -138.22116088867188, "logps_train/ref_1_2": -95.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -81.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -0.36759793758392334, "rewards_train/1-l": -0.5947425961494446, "rewards_train/1-w": 1.676405668258667, "rewards_train/2-2": 0.8222595453262329, "rewards_train/2-w": 0.22397711873054504, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.2711482644081116, "rewards_train/margins_1": 2.0440036058425903, "rewards_train/margins_2": 0.5982824265956879, "step": 51 }, { "epoch": 0.16, "learning_rate": 4.9901246065380425e-06, "loss": 1.2838, "step": 52 }, { "epoch": 0.16, "logps_train/policy_1_2": -175.84417724609375, "logps_train/policy_1_l": -174.29867553710938, "logps_train/policy_1_w": -111.38949584960938, "logps_train/policy_2_2": -129.58331298828125, "logps_train/policy_2_w": -152.207763671875, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -0.5129333734512329, "rewards_train/1-l": -1.8392419815063477, "rewards_train/1-w": 1.6239409446716309, "rewards_train/2-2": 2.128779172897339, "rewards_train/2-w": -0.3426502048969269, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4631829261779785, "rewards_train/margins_1": 2.1368743181228638, "rewards_train/margins_2": 2.4714293777942657, "step": 52 }, { "epoch": 0.16, "logps_train/policy_1_2": -223.01547241210938, "logps_train/policy_1_l": -244.46470642089844, "logps_train/policy_1_w": -147.49871826171875, "logps_train/policy_2_2": -178.22952270507812, "logps_train/policy_2_w": -198.99978637695312, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -223.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.7862161993980408, "rewards_train/1-l": -2.1429550647735596, "rewards_train/1-w": 1.9584300518035889, "rewards_train/2-2": 1.4846656322479248, "rewards_train/2-w": -0.8579864501953125, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.101385116577148, "rewards_train/margins_1": 2.7446462512016296, "rewards_train/margins_2": 2.3426520824432373, "step": 52 }, { "epoch": 0.16, "logps_train/policy_1_2": -244.22019958496094, "logps_train/policy_1_l": -210.6483154296875, "logps_train/policy_1_w": -141.55526733398438, "logps_train/policy_2_2": -178.40240478515625, "logps_train/policy_2_w": -207.5647430419922, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -1.2063950300216675, "rewards_train/1-l": -1.5931518077850342, "rewards_train/1-w": 1.557949185371399, "rewards_train/2-2": 2.1156182289123535, "rewards_train/2-w": -1.4937795400619507, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.151100993156433, "rewards_train/margins_1": 2.7643442153930664, "rewards_train/margins_2": 3.609397768974304, "step": 52 }, { "epoch": 0.16, "logps_train/policy_1_2": -181.25436401367188, "logps_train/policy_1_l": -113.82414245605469, "logps_train/policy_1_w": -143.1304931640625, "logps_train/policy_2_2": -132.17935180664062, "logps_train/policy_2_w": -186.8953857421875, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.6793410181999207, "rewards_train/1-l": -1.4601969718933105, "rewards_train/1-w": 1.9051142930984497, "rewards_train/2-2": 1.9687845706939697, "rewards_train/2-w": -0.3991090953350067, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.3653112649917603, "rewards_train/margins_1": 2.5844553112983704, "rewards_train/margins_2": 2.3678936660289764, "step": 52 }, { "epoch": 0.16, "logps_train/policy_1_2": -254.60275268554688, "logps_train/policy_1_l": -229.16583251953125, "logps_train/policy_1_w": -190.6562042236328, "logps_train/policy_2_2": -197.69342041015625, "logps_train/policy_2_w": -271.3583984375, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -213.0, "logps_train/ref_2_w": -253.0, "rewards_train/1-2": -1.4899616241455078, "rewards_train/1-l": -1.9943664073944092, "rewards_train/1-w": 2.3492226600646973, "rewards_train/2-2": 1.5275335311889648, "rewards_train/2-w": -1.788377046585083, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.3435890674591064, "rewards_train/margins_1": 3.839184284210205, "rewards_train/margins_2": 3.315910577774048, "step": 53 }, { "epoch": 0.16, "logps_train/policy_1_2": -168.5994415283203, "logps_train/policy_1_l": -180.67202758789062, "logps_train/policy_1_w": -134.20994567871094, "logps_train/policy_2_2": -128.78857421875, "logps_train/policy_2_w": -181.59573364257812, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -0.1923653781414032, "rewards_train/1-l": -1.5576322078704834, "rewards_train/1-w": 2.015235424041748, "rewards_train/2-2": 1.680713415145874, "rewards_train/2-w": -0.6035664677619934, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5728676319122314, "rewards_train/margins_1": 2.2076008021831512, "rewards_train/margins_2": 2.2842798829078674, "step": 53 }, { "epoch": 0.16, "logps_train/policy_1_2": -146.26025390625, "logps_train/policy_1_l": -160.4655303955078, "logps_train/policy_1_w": -131.2767333984375, "logps_train/policy_2_2": -104.4939956665039, "logps_train/policy_2_w": -179.02508544921875, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -0.46118223667144775, "rewards_train/1-l": -1.4094438552856445, "rewards_train/1-w": 2.0996694564819336, "rewards_train/2-2": 1.6838033199310303, "rewards_train/2-w": -0.5732111930847168, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.509113311767578, "rewards_train/margins_1": 2.5608516931533813, "rewards_train/margins_2": 2.257014513015747, "step": 53 }, { "epoch": 0.16, "logps_train/policy_1_2": -114.15900421142578, "logps_train/policy_1_l": -75.79597473144531, "logps_train/policy_1_w": -86.6729736328125, "logps_train/policy_2_2": -83.6474609375, "logps_train/policy_2_w": -113.50809478759766, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -92.5, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": -0.4924631118774414, "rewards_train/1-l": -0.5871654748916626, "rewards_train/1-w": 1.0131714344024658, "rewards_train/2-2": 0.9030264616012573, "rewards_train/2-w": -0.11096608638763428, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 1.6003369092941284, "rewards_train/margins_1": 1.5056345462799072, "rewards_train/margins_2": 1.0139925479888916, "step": 53 }, { "epoch": 0.16, "learning_rate": 4.987810040379161e-06, "loss": 1.2147, "step": 54 }, { "epoch": 0.16, "logps_train/policy_1_2": -207.39183044433594, "logps_train/policy_1_l": -206.8221893310547, "logps_train/policy_1_w": -149.48782348632812, "logps_train/policy_2_2": -157.55877685546875, "logps_train/policy_2_w": -194.116455078125, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -0.3876199722290039, "rewards_train/1-l": -1.6984789371490479, "rewards_train/1-w": 1.4152814149856567, "rewards_train/2-2": 2.137092351913452, "rewards_train/2-w": -0.8413340449333191, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1137603521347046, "rewards_train/margins_1": 1.8029013872146606, "rewards_train/margins_2": 2.9784263968467712, "step": 54 }, { "epoch": 0.16, "logps_train/policy_1_2": -154.98928833007812, "logps_train/policy_1_l": -137.46002197265625, "logps_train/policy_1_w": -109.4779052734375, "logps_train/policy_2_2": -112.47721862792969, "logps_train/policy_2_w": -150.6396484375, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.8021763563156128, "rewards_train/1-l": -1.3424861431121826, "rewards_train/1-w": 1.5407829284667969, "rewards_train/2-2": 1.5657545328140259, "rewards_train/2-w": -0.2764650583267212, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 2.8832690715789795, "rewards_train/margins_1": 2.3429592847824097, "rewards_train/margins_2": 1.842219591140747, "step": 54 }, { "epoch": 0.16, "logps_train/policy_1_2": -133.69851684570312, "logps_train/policy_1_l": -120.974365234375, "logps_train/policy_1_w": -92.46407318115234, "logps_train/policy_2_2": -104.36334228515625, "logps_train/policy_2_w": -122.72551727294922, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": -0.2776632606983185, "rewards_train/1-l": -1.3082759380340576, "rewards_train/1-w": 1.161405324935913, "rewards_train/2-2": 0.9828066229820251, "rewards_train/2-w": -0.17509031295776367, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.4696812629699707, "rewards_train/margins_1": 1.4390685856342316, "rewards_train/margins_2": 1.1578969359397888, "step": 54 }, { "epoch": 0.16, "logps_train/policy_1_2": -154.55770874023438, "logps_train/policy_1_l": -155.955810546875, "logps_train/policy_1_w": -119.71995544433594, "logps_train/policy_2_2": -121.77608489990234, "logps_train/policy_2_w": -165.35958862304688, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.16358238458633423, "rewards_train/1-l": -1.1078864336013794, "rewards_train/1-w": 1.5317156314849854, "rewards_train/2-2": 1.4368444681167603, "rewards_train/2-w": -0.6023643016815186, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.6396020650863647, "rewards_train/margins_1": 1.6952980160713196, "rewards_train/margins_2": 2.039208769798279, "step": 54 }, { "epoch": 0.16, "logps_train/policy_1_2": -182.0533447265625, "logps_train/policy_1_l": -187.70635986328125, "logps_train/policy_1_w": -144.321533203125, "logps_train/policy_2_2": -126.81192779541016, "logps_train/policy_2_w": -205.99636840820312, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -1.1459591388702393, "rewards_train/1-l": -1.2137988805770874, "rewards_train/1-w": 1.9783940315246582, "rewards_train/2-2": 1.596932291984558, "rewards_train/2-w": -1.026395320892334, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.1921929121017456, "rewards_train/margins_1": 3.1243531703948975, "rewards_train/margins_2": 2.623327612876892, "step": 55 }, { "epoch": 0.16, "logps_train/policy_1_2": -178.5445556640625, "logps_train/policy_1_l": -156.19638061523438, "logps_train/policy_1_w": -146.04342651367188, "logps_train/policy_2_2": -124.65614318847656, "logps_train/policy_2_w": -201.30108642578125, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.621057152748108, "rewards_train/1-l": -1.1874098777770996, "rewards_train/1-w": 1.8030784130096436, "rewards_train/2-2": 1.1607537269592285, "rewards_train/2-w": -1.0238593816757202, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.990488290786743, "rewards_train/margins_1": 3.4241355657577515, "rewards_train/margins_2": 2.1846131086349487, "step": 55 }, { "epoch": 0.16, "logps_train/policy_1_2": -231.41180419921875, "logps_train/policy_1_l": -181.11083984375, "logps_train/policy_1_w": -151.6715545654297, "logps_train/policy_2_2": -166.4213104248047, "logps_train/policy_2_w": -203.14459228515625, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -1.1306345462799072, "rewards_train/1-l": -1.4940924644470215, "rewards_train/1-w": 2.028743267059326, "rewards_train/2-2": 2.2551345825195312, "rewards_train/2-w": -0.7132877111434937, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.5228357315063477, "rewards_train/margins_1": 3.1593778133392334, "rewards_train/margins_2": 2.968422293663025, "step": 55 }, { "epoch": 0.16, "logps_train/policy_1_2": -179.82461547851562, "logps_train/policy_1_l": -140.7049560546875, "logps_train/policy_1_w": -133.77719116210938, "logps_train/policy_2_2": -131.91494750976562, "logps_train/policy_2_w": -185.37924194335938, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -1.0746484994888306, "rewards_train/1-l": -1.0355346202850342, "rewards_train/1-w": 1.5453271865844727, "rewards_train/2-2": 1.6151461601257324, "rewards_train/2-w": -1.1957359313964844, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.580861806869507, "rewards_train/margins_1": 2.6199756860733032, "rewards_train/margins_2": 2.810882091522217, "step": 55 }, { "epoch": 0.17, "learning_rate": 4.985252668346077e-06, "loss": 1.237, "step": 56 }, { "epoch": 0.17, "logps_train/policy_1_2": -150.80967712402344, "logps_train/policy_1_l": -157.806884765625, "logps_train/policy_1_w": -156.3776092529297, "logps_train/policy_2_2": -117.21499633789062, "logps_train/policy_2_w": -199.60943603515625, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -0.4929308295249939, "rewards_train/1-l": -0.8806400299072266, "rewards_train/1-w": 1.8264963626861572, "rewards_train/2-2": 1.2556977272033691, "rewards_train/2-w": -0.4812553822994232, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.707136392593384, "rewards_train/margins_1": 2.319427192211151, "rewards_train/margins_2": 1.7369531095027924, "step": 56 }, { "epoch": 0.17, "logps_train/policy_1_2": -168.42691040039062, "logps_train/policy_1_l": -188.09054565429688, "logps_train/policy_1_w": -166.81570434570312, "logps_train/policy_2_2": -133.33033752441406, "logps_train/policy_2_w": -211.6710205078125, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -0.7122218608856201, "rewards_train/1-l": -1.3448926210403442, "rewards_train/1-w": 2.0080039501190186, "rewards_train/2-2": 1.0193099975585938, "rewards_train/2-w": -0.38204413652420044, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.352896571159363, "rewards_train/margins_1": 2.7202258110046387, "rewards_train/margins_2": 1.4013541340827942, "step": 56 }, { "epoch": 0.17, "logps_train/policy_1_2": -186.4659881591797, "logps_train/policy_1_l": -173.35289001464844, "logps_train/policy_1_w": -114.81977081298828, "logps_train/policy_2_2": -130.67068481445312, "logps_train/policy_2_w": -166.0166778564453, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": -1.1171061992645264, "rewards_train/1-l": -1.6434911489486694, "rewards_train/1-w": 1.2158749103546143, "rewards_train/2-2": 1.6973835229873657, "rewards_train/2-w": -1.1391675472259521, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.8593660593032837, "rewards_train/margins_1": 2.3329811096191406, "rewards_train/margins_2": 2.836551070213318, "step": 56 }, { "epoch": 0.17, "logps_train/policy_1_2": -162.46536254882812, "logps_train/policy_1_l": -183.80966186523438, "logps_train/policy_1_w": -132.76577758789062, "logps_train/policy_2_2": -115.33319854736328, "logps_train/policy_2_w": -170.14892578125, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.2582550048828125, "rewards_train/1-l": -2.240926742553711, "rewards_train/1-w": 1.4441249370574951, "rewards_train/2-2": 1.2139458656311035, "rewards_train/2-w": -0.4234861731529236, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.685051679611206, "rewards_train/margins_1": 2.7023799419403076, "rewards_train/margins_2": 1.637432038784027, "step": 56 }, { "epoch": 0.17, "logps_train/policy_1_2": -178.66990661621094, "logps_train/policy_1_l": -211.17059326171875, "logps_train/policy_1_w": -136.64324951171875, "logps_train/policy_2_2": -128.85873413085938, "logps_train/policy_2_w": -198.77001953125, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -0.4941388964653015, "rewards_train/1-l": -2.1575870513916016, "rewards_train/1-w": 1.912921667098999, "rewards_train/2-2": 2.0225257873535156, "rewards_train/2-w": -1.2535649538040161, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.070508718490601, "rewards_train/margins_1": 2.4070605635643005, "rewards_train/margins_2": 3.2760907411575317, "step": 57 }, { "epoch": 0.17, "logps_train/policy_1_2": -123.593017578125, "logps_train/policy_1_l": -96.99530792236328, "logps_train/policy_1_w": -76.0704574584961, "logps_train/policy_2_2": -86.53984069824219, "logps_train/policy_2_w": -116.30863952636719, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -87.5, "logps_train/ref_1_w": -88.0, "logps_train/ref_2_2": -96.5, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": -0.5084227323532104, "rewards_train/1-l": -0.9374215602874756, "rewards_train/1-w": 1.185338020324707, "rewards_train/2-2": 1.0143760442733765, "rewards_train/2-w": -0.7477583289146423, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.1227595806121826, "rewards_train/margins_1": 1.6937607526779175, "rewards_train/margins_2": 1.7621343731880188, "step": 57 }, { "epoch": 0.17, "logps_train/policy_1_2": -186.9095458984375, "logps_train/policy_1_l": -167.92626953125, "logps_train/policy_1_w": -125.60015106201172, "logps_train/policy_2_2": -127.39295959472656, "logps_train/policy_2_w": -172.4682159423828, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -1.2011115550994873, "rewards_train/1-l": -1.032470703125, "rewards_train/1-w": 1.3805118799209595, "rewards_train/2-2": 1.835313081741333, "rewards_train/2-w": -0.9013137221336365, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.4129825830459595, "rewards_train/margins_1": 2.5816234350204468, "rewards_train/margins_2": 2.7366268038749695, "step": 57 }, { "epoch": 0.17, "logps_train/policy_1_2": -199.1376953125, "logps_train/policy_1_l": -190.81381225585938, "logps_train/policy_1_w": -127.20780181884766, "logps_train/policy_2_2": -151.14010620117188, "logps_train/policy_2_w": -173.16429138183594, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -0.7532233595848083, "rewards_train/1-l": -1.9833335876464844, "rewards_train/1-w": 1.6889855861663818, "rewards_train/2-2": 1.8543484210968018, "rewards_train/2-w": -0.25588303804397583, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.672319173812866, "rewards_train/margins_1": 2.44220894575119, "rewards_train/margins_2": 2.1102314591407776, "step": 57 }, { "epoch": 0.17, "learning_rate": 4.982452740033793e-06, "loss": 1.2806, "step": 58 }, { "epoch": 0.17, "logps_train/policy_1_2": -163.5117645263672, "logps_train/policy_1_l": -167.69198608398438, "logps_train/policy_1_w": -150.0470428466797, "logps_train/policy_2_2": -122.8832778930664, "logps_train/policy_2_w": -203.51548767089844, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -0.28047314286231995, "rewards_train/1-l": -1.0902928113937378, "rewards_train/1-w": 1.6363117694854736, "rewards_train/2-2": 1.8477073907852173, "rewards_train/2-w": -1.096469759941101, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.7266045808792114, "rewards_train/margins_1": 1.9167849123477936, "rewards_train/margins_2": 2.9441771507263184, "step": 58 }, { "epoch": 0.17, "logps_train/policy_1_2": -218.3140869140625, "logps_train/policy_1_l": -182.35906982421875, "logps_train/policy_1_w": -133.2689666748047, "logps_train/policy_2_2": -157.47201538085938, "logps_train/policy_2_w": -179.90867614746094, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.9189105033874512, "rewards_train/1-l": -1.4946973323822021, "rewards_train/1-w": 1.247933030128479, "rewards_train/2-2": 2.0641260147094727, "rewards_train/2-w": -1.0048326253890991, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.742630362510681, "rewards_train/margins_1": 2.16684353351593, "rewards_train/margins_2": 3.0689586400985718, "step": 58 }, { "epoch": 0.17, "logps_train/policy_1_2": -169.72528076171875, "logps_train/policy_1_l": -146.71871948242188, "logps_train/policy_1_w": -144.2645721435547, "logps_train/policy_2_2": -122.71769714355469, "logps_train/policy_2_w": -196.64625549316406, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.7381534576416016, "rewards_train/1-l": -0.9687706232070923, "rewards_train/1-w": 1.7571358680725098, "rewards_train/2-2": 1.6192457675933838, "rewards_train/2-w": -0.6810321807861328, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.725906491279602, "rewards_train/margins_1": 2.4952893257141113, "rewards_train/margins_2": 2.3002779483795166, "step": 58 }, { "epoch": 0.17, "logps_train/policy_1_2": -174.73388671875, "logps_train/policy_1_l": -184.4317169189453, "logps_train/policy_1_w": -160.59750366210938, "logps_train/policy_2_2": -127.41934967041016, "logps_train/policy_2_w": -210.33518981933594, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -0.8687016367912292, "rewards_train/1-l": -1.1815506219863892, "rewards_train/1-w": 1.5855611562728882, "rewards_train/2-2": 1.2650964260101318, "rewards_train/2-w": -0.7272688150405884, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.7671117782592773, "rewards_train/margins_1": 2.4542627930641174, "rewards_train/margins_2": 1.9923652410507202, "step": 58 }, { "epoch": 0.18, "logps_train/policy_1_2": -155.87896728515625, "logps_train/policy_1_l": -149.66775512695312, "logps_train/policy_1_w": -105.76326751708984, "logps_train/policy_2_2": -114.5276870727539, "logps_train/policy_2_w": -150.14230346679688, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -0.5828181505203247, "rewards_train/1-l": -1.5740991830825806, "rewards_train/1-w": 1.1867594718933105, "rewards_train/2-2": 1.453481674194336, "rewards_train/2-w": -0.9603245258331299, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.760858654975891, "rewards_train/margins_1": 1.7695776224136353, "rewards_train/margins_2": 2.413806200027466, "step": 59 }, { "epoch": 0.18, "logps_train/policy_1_2": -214.9193115234375, "logps_train/policy_1_l": -167.12905883789062, "logps_train/policy_1_w": -163.68975830078125, "logps_train/policy_2_2": -165.52117919921875, "logps_train/policy_2_w": -226.24639892578125, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -0.8122439384460449, "rewards_train/1-l": -1.6400527954101562, "rewards_train/1-w": 1.4863958358764648, "rewards_train/2-2": 1.7341125011444092, "rewards_train/2-w": -1.4578430652618408, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.126448631286621, "rewards_train/margins_1": 2.2986397743225098, "rewards_train/margins_2": 3.19195556640625, "step": 59 }, { "epoch": 0.18, "logps_train/policy_1_2": -122.04054260253906, "logps_train/policy_1_l": -150.68637084960938, "logps_train/policy_1_w": -117.0896224975586, "logps_train/policy_2_2": -93.84215545654297, "logps_train/policy_2_w": -162.21875, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": 0.07621902227401733, "rewards_train/1-l": -1.3552824258804321, "rewards_train/1-w": 1.7464823722839355, "rewards_train/2-2": 1.5175423622131348, "rewards_train/2-w": -0.4035639762878418, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.1017647981643677, "rewards_train/margins_1": 1.6702633500099182, "rewards_train/margins_2": 1.9211063385009766, "step": 59 }, { "epoch": 0.18, "logps_train/policy_1_2": -228.44203186035156, "logps_train/policy_1_l": -189.474365234375, "logps_train/policy_1_w": -159.88946533203125, "logps_train/policy_2_2": -161.86358642578125, "logps_train/policy_2_w": -224.89974975585938, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -1.3094396591186523, "rewards_train/1-l": -2.061499834060669, "rewards_train/1-w": 2.2419137954711914, "rewards_train/2-2": 2.051629066467285, "rewards_train/2-w": -0.6707382202148438, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.30341362953186, "rewards_train/margins_1": 3.5513534545898438, "rewards_train/margins_2": 2.722367286682129, "step": 59 }, { "epoch": 0.18, "learning_rate": 4.979410528710376e-06, "loss": 1.2793, "step": 60 }, { "epoch": 0.18, "logps_train/policy_1_2": -184.65798950195312, "logps_train/policy_1_l": -202.58868408203125, "logps_train/policy_1_w": -167.15574645996094, "logps_train/policy_2_2": -144.56771850585938, "logps_train/policy_2_w": -215.0994873046875, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -0.5504189729690552, "rewards_train/1-l": -1.516194224357605, "rewards_train/1-w": 1.8199231624603271, "rewards_train/2-2": 1.5793604850769043, "rewards_train/2-w": -0.24998651444911957, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.336117386817932, "rewards_train/margins_1": 2.3703421354293823, "rewards_train/margins_2": 1.8293469995260239, "step": 60 }, { "epoch": 0.18, "logps_train/policy_1_2": -192.77279663085938, "logps_train/policy_1_l": -192.42132568359375, "logps_train/policy_1_w": -166.2535400390625, "logps_train/policy_2_2": -145.587890625, "logps_train/policy_2_w": -220.03140258789062, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -1.2124348878860474, "rewards_train/1-l": -2.3466227054595947, "rewards_train/1-w": 1.2922241687774658, "rewards_train/2-2": 1.3544926643371582, "rewards_train/2-w": -1.4871233701705933, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.6388468742370605, "rewards_train/margins_1": 2.504659056663513, "rewards_train/margins_2": 2.8416160345077515, "step": 60 }, { "epoch": 0.18, "logps_train/policy_1_2": -269.47137451171875, "logps_train/policy_1_l": -248.87416076660156, "logps_train/policy_1_w": -205.83816528320312, "logps_train/policy_2_2": -205.35598754882812, "logps_train/policy_2_w": -288.1234130859375, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -243.0, "logps_train/ref_2_2": -230.0, "logps_train/ref_2_w": -282.0, "rewards_train/1-2": -0.8740885853767395, "rewards_train/1-l": -2.0723769664764404, "rewards_train/1-w": 3.700559616088867, "rewards_train/2-2": 2.5111775398254395, "rewards_train/2-w": -0.6428077220916748, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.772936582565308, "rewards_train/margins_1": 4.574648201465607, "rewards_train/margins_2": 3.1539852619171143, "step": 60 }, { "epoch": 0.18, "logps_train/policy_1_2": -212.3658447265625, "logps_train/policy_1_l": -207.11614990234375, "logps_train/policy_1_w": -164.09115600585938, "logps_train/policy_2_2": -166.87535095214844, "logps_train/policy_2_w": -219.23318481445312, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": -0.6443965435028076, "rewards_train/1-l": -1.5114185810089111, "rewards_train/1-w": 2.3510398864746094, "rewards_train/2-2": 1.5685195922851562, "rewards_train/2-w": -0.3043731451034546, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8624584674835205, "rewards_train/margins_1": 2.995436429977417, "rewards_train/margins_2": 1.8728927373886108, "step": 60 }, { "epoch": 0.18, "logps_train/policy_1_2": -270.6756591796875, "logps_train/policy_1_l": -261.52789306640625, "logps_train/policy_1_w": -184.46734619140625, "logps_train/policy_2_2": -198.60231018066406, "logps_train/policy_2_w": -252.28631591796875, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -237.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -223.0, "logps_train/ref_2_w": -239.0, "rewards_train/1-2": -1.0464708805084229, "rewards_train/1-l": -2.498199939727783, "rewards_train/1-w": 2.3560006618499756, "rewards_train/2-2": 2.410862922668457, "rewards_train/2-w": -1.3368353843688965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.854200601577759, "rewards_train/margins_1": 3.4024715423583984, "rewards_train/margins_2": 3.7476983070373535, "step": 61 }, { "epoch": 0.18, "logps_train/policy_1_2": -150.35098266601562, "logps_train/policy_1_l": -122.82208251953125, "logps_train/policy_1_w": -127.15911102294922, "logps_train/policy_2_2": -116.27395629882812, "logps_train/policy_2_w": -163.3336181640625, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": 0.026621520519256592, "rewards_train/1-l": -0.9056456089019775, "rewards_train/1-w": 1.7184642553329468, "rewards_train/2-2": 1.6765108108520508, "rewards_train/2-w": -0.19195556640625, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6241098642349243, "rewards_train/margins_1": 1.6918427348136902, "rewards_train/margins_2": 1.8684663772583008, "step": 61 }, { "epoch": 0.18, "logps_train/policy_1_2": -128.57720947265625, "logps_train/policy_1_l": -130.03604125976562, "logps_train/policy_1_w": -84.76609802246094, "logps_train/policy_2_2": -96.59211730957031, "logps_train/policy_2_w": -123.01612091064453, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": -0.38740813732147217, "rewards_train/1-l": -1.1416908502578735, "rewards_train/1-w": 1.3817880153656006, "rewards_train/2-2": 1.1849288940429688, "rewards_train/2-w": -0.17622090876102448, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.523478865623474, "rewards_train/margins_1": 1.7691961526870728, "rewards_train/margins_2": 1.3611498028039932, "step": 61 }, { "epoch": 0.18, "logps_train/policy_1_2": -199.18133544921875, "logps_train/policy_1_l": -165.88299560546875, "logps_train/policy_1_w": -138.15533447265625, "logps_train/policy_2_2": -153.86703491210938, "logps_train/policy_2_w": -186.10565185546875, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -0.6435237526893616, "rewards_train/1-l": -1.361931562423706, "rewards_train/1-w": 1.435248851776123, "rewards_train/2-2": 1.6160317659378052, "rewards_train/2-w": -1.076581597328186, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.797180414199829, "rewards_train/margins_1": 2.0787726044654846, "rewards_train/margins_2": 2.692613363265991, "step": 61 }, { "epoch": 0.19, "learning_rate": 4.97612633129029e-06, "loss": 1.2102, "step": 62 }, { "epoch": 0.19, "logps_train/policy_1_2": -176.91224670410156, "logps_train/policy_1_l": -141.07473754882812, "logps_train/policy_1_w": -125.69603729248047, "logps_train/policy_2_2": -129.07040405273438, "logps_train/policy_2_w": -177.7951202392578, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -0.8220834732055664, "rewards_train/1-l": -1.5016155242919922, "rewards_train/1-w": 1.3936771154403687, "rewards_train/2-2": 1.5546772480010986, "rewards_train/2-w": -1.1252152919769287, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.895292639732361, "rewards_train/margins_1": 2.215760588645935, "rewards_train/margins_2": 2.6798925399780273, "step": 62 }, { "epoch": 0.19, "logps_train/policy_1_2": -231.33059692382812, "logps_train/policy_1_l": -166.8428955078125, "logps_train/policy_1_w": -212.96783447265625, "logps_train/policy_2_2": -173.66233825683594, "logps_train/policy_2_w": -299.80987548828125, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -244.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -290.0, "rewards_train/1-2": -1.4500995874404907, "rewards_train/1-l": -1.1198347806930542, "rewards_train/1-w": 3.0564873218536377, "rewards_train/2-2": 1.5067154169082642, "rewards_train/2-w": -0.9818675518035889, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.176322102546692, "rewards_train/margins_1": 4.506586909294128, "rewards_train/margins_2": 2.488582968711853, "step": 62 }, { "epoch": 0.19, "logps_train/policy_1_2": -180.294921875, "logps_train/policy_1_l": -185.1522216796875, "logps_train/policy_1_w": -118.35958862304688, "logps_train/policy_2_2": -134.18833923339844, "logps_train/policy_2_w": -169.3937530517578, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.772071123123169, "rewards_train/1-l": -1.5302619934082031, "rewards_train/1-w": 1.4370884895324707, "rewards_train/2-2": 1.6693493127822876, "rewards_train/2-w": -0.9967971444129944, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.967350482940674, "rewards_train/margins_1": 2.2091596126556396, "rewards_train/margins_2": 2.666146457195282, "step": 62 }, { "epoch": 0.19, "logps_train/policy_1_2": -186.7953643798828, "logps_train/policy_1_l": -185.018310546875, "logps_train/policy_1_w": -181.14462280273438, "logps_train/policy_2_2": -155.2574920654297, "logps_train/policy_2_w": -220.99847412109375, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": 0.11890139430761337, "rewards_train/1-l": -1.3321034908294678, "rewards_train/1-w": 1.8740140199661255, "rewards_train/2-2": 1.6834306716918945, "rewards_train/2-w": -0.09359710663557053, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2061175107955933, "rewards_train/margins_1": 1.7551126256585121, "rewards_train/margins_2": 1.777027778327465, "step": 62 }, { "epoch": 0.19, "logps_train/policy_1_2": -175.74459838867188, "logps_train/policy_1_l": -141.07833862304688, "logps_train/policy_1_w": -109.72785949707031, "logps_train/policy_2_2": -128.36724853515625, "logps_train/policy_2_w": -148.0393524169922, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -0.9861778020858765, "rewards_train/1-l": -1.1264137029647827, "rewards_train/1-w": 1.5414714813232422, "rewards_train/2-2": 1.2812448740005493, "rewards_train/2-w": -0.42854562401771545, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.667885184288025, "rewards_train/margins_1": 2.5276492834091187, "rewards_train/margins_2": 1.7097904980182648, "step": 63 }, { "epoch": 0.19, "logps_train/policy_1_2": -131.147216796875, "logps_train/policy_1_l": -74.74335479736328, "logps_train/policy_1_w": -97.38616943359375, "logps_train/policy_2_2": -90.15480041503906, "logps_train/policy_2_w": -138.35202026367188, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -69.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -0.8627696633338928, "rewards_train/1-l": -0.5684155225753784, "rewards_train/1-w": 1.3114442825317383, "rewards_train/2-2": 1.060105562210083, "rewards_train/2-w": -0.8247165083885193, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 1.8798598051071167, "rewards_train/margins_1": 2.174213945865631, "rewards_train/margins_2": 1.8848220705986023, "step": 63 }, { "epoch": 0.19, "logps_train/policy_1_2": -125.23835754394531, "logps_train/policy_1_l": -102.91542053222656, "logps_train/policy_1_w": -92.57563781738281, "logps_train/policy_2_2": -93.10368347167969, "logps_train/policy_2_w": -128.7369384765625, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": -0.42383629083633423, "rewards_train/1-l": -0.8974504470825195, "rewards_train/1-w": 1.1946830749511719, "rewards_train/2-2": 1.3120931386947632, "rewards_train/2-w": -0.47721073031425476, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.0921335220336914, "rewards_train/margins_1": 1.618519365787506, "rewards_train/margins_2": 1.789303869009018, "step": 63 }, { "epoch": 0.19, "logps_train/policy_1_2": -197.79656982421875, "logps_train/policy_1_l": -187.30322265625, "logps_train/policy_1_w": -119.24455261230469, "logps_train/policy_2_2": -152.93112182617188, "logps_train/policy_2_w": -168.5566864013672, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.21637707948684692, "rewards_train/1-l": -1.4334468841552734, "rewards_train/1-w": 1.4691001176834106, "rewards_train/2-2": 1.8889185190200806, "rewards_train/2-w": -0.9712939262390137, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.902547001838684, "rewards_train/margins_1": 1.6854771971702576, "rewards_train/margins_2": 2.8602124452590942, "step": 63 }, { "epoch": 0.19, "learning_rate": 4.9726004683054105e-06, "loss": 1.2594, "step": 64 }, { "epoch": 0.19, "logps_train/policy_1_2": -147.02951049804688, "logps_train/policy_1_l": -152.34146118164062, "logps_train/policy_1_w": -135.77938842773438, "logps_train/policy_2_2": -102.61683654785156, "logps_train/policy_2_w": -185.6272735595703, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -0.5658419132232666, "rewards_train/1-l": -1.4710606336593628, "rewards_train/1-w": 1.8527263402938843, "rewards_train/2-2": 1.3545269966125488, "rewards_train/2-w": -0.8772777318954468, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.323786973953247, "rewards_train/margins_1": 2.418568253517151, "rewards_train/margins_2": 2.2318047285079956, "step": 64 }, { "epoch": 0.19, "logps_train/policy_1_2": -234.07318115234375, "logps_train/policy_1_l": -213.59912109375, "logps_train/policy_1_w": -165.49566650390625, "logps_train/policy_2_2": -183.04367065429688, "logps_train/policy_2_w": -208.77178955078125, "logps_train/ref_1_2": -233.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -0.03466269373893738, "rewards_train/1-l": -1.7316889762878418, "rewards_train/1-w": 1.9674266576766968, "rewards_train/2-2": 2.439383029937744, "rewards_train/2-w": -0.15120112895965576, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.6991156339645386, "rewards_train/margins_1": 2.002089351415634, "rewards_train/margins_2": 2.5905841588974, "step": 64 }, { "epoch": 0.19, "logps_train/policy_1_2": -179.98678588867188, "logps_train/policy_1_l": -191.6248321533203, "logps_train/policy_1_w": -160.68365478515625, "logps_train/policy_2_2": -144.92063903808594, "logps_train/policy_2_w": -201.52947998046875, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": 0.048392727971076965, "rewards_train/1-l": -1.6921712160110474, "rewards_train/1-w": 2.0042903423309326, "rewards_train/2-2": 1.9537371397018433, "rewards_train/2-w": -0.07365107536315918, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.69646155834198, "rewards_train/margins_1": 1.9558976143598557, "rewards_train/margins_2": 2.0273882150650024, "step": 64 }, { "epoch": 0.19, "logps_train/policy_1_2": -176.26211547851562, "logps_train/policy_1_l": -218.69020080566406, "logps_train/policy_1_w": -154.56690979003906, "logps_train/policy_2_2": -137.84194946289062, "logps_train/policy_2_w": -193.07135009765625, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 0.03589847683906555, "rewards_train/1-l": -2.2524189949035645, "rewards_train/1-w": 1.694870948791504, "rewards_train/2-2": 1.9341648817062378, "rewards_train/2-w": -0.2657291293144226, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9472899436950684, "rewards_train/margins_1": 1.6589724719524384, "rewards_train/margins_2": 2.1998940110206604, "step": 64 }, { "epoch": 0.19, "logps_train/policy_1_2": -161.07705688476562, "logps_train/policy_1_l": -127.59225463867188, "logps_train/policy_1_w": -138.15196228027344, "logps_train/policy_2_2": -113.91970825195312, "logps_train/policy_2_w": -191.77999877929688, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.39950376749038696, "rewards_train/1-l": -1.4100066423416138, "rewards_train/1-w": 2.6137099266052246, "rewards_train/2-2": 1.9259977340698242, "rewards_train/2-w": -0.20065739750862122, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.023716568946838, "rewards_train/margins_1": 3.0132136940956116, "rewards_train/margins_2": 2.1266551315784454, "step": 65 }, { "epoch": 0.19, "logps_train/policy_1_2": -88.80239868164062, "logps_train/policy_1_l": -73.11553192138672, "logps_train/policy_1_w": -62.48269271850586, "logps_train/policy_2_2": -68.53363037109375, "logps_train/policy_2_w": -81.28339385986328, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -69.0, "logps_train/ref_1_w": -69.0, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -77.0, "rewards_train/1-2": -0.12622344493865967, "rewards_train/1-l": -0.42336970567703247, "rewards_train/1-w": 0.6431370973587036, "rewards_train/2-2": 0.5745822787284851, "rewards_train/2-w": -0.4015814960002899, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.066506803035736, "rewards_train/margins_1": 0.7693605422973633, "rewards_train/margins_2": 0.976163774728775, "step": 65 }, { "epoch": 0.19, "logps_train/policy_1_2": -180.7008056640625, "logps_train/policy_1_l": -189.20538330078125, "logps_train/policy_1_w": -137.12762451171875, "logps_train/policy_2_2": -132.88681030273438, "logps_train/policy_2_w": -183.43988037109375, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -0.8888298273086548, "rewards_train/1-l": -1.7108219861984253, "rewards_train/1-w": 1.5898252725601196, "rewards_train/2-2": 1.7190337181091309, "rewards_train/2-w": -0.3980405926704407, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.300647258758545, "rewards_train/margins_1": 2.4786550998687744, "rewards_train/margins_2": 2.1170743107795715, "step": 65 }, { "epoch": 0.19, "logps_train/policy_1_2": -159.29437255859375, "logps_train/policy_1_l": -141.1519775390625, "logps_train/policy_1_w": -122.52241516113281, "logps_train/policy_2_2": -121.4588851928711, "logps_train/policy_2_w": -169.2248992919922, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -0.07396787405014038, "rewards_train/1-l": -1.185827374458313, "rewards_train/1-w": 1.980179786682129, "rewards_train/2-2": 1.684188961982727, "rewards_train/2-w": -0.5357708930969238, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.166007161140442, "rewards_train/margins_1": 2.0541476607322693, "rewards_train/margins_2": 2.219959855079651, "step": 65 }, { "epoch": 0.2, "learning_rate": 4.96883328387375e-06, "loss": 1.3851, "step": 66 }, { "epoch": 0.2, "logps_train/policy_1_2": -191.03955078125, "logps_train/policy_1_l": -150.73080444335938, "logps_train/policy_1_w": -132.69825744628906, "logps_train/policy_2_2": -142.30728149414062, "logps_train/policy_2_w": -181.74224853515625, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -0.3800771236419678, "rewards_train/1-l": -0.9464700818061829, "rewards_train/1-w": 1.414647102355957, "rewards_train/2-2": 1.9971516132354736, "rewards_train/2-w": -0.8489819765090942, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.36111718416214, "rewards_train/margins_1": 1.7947242259979248, "rewards_train/margins_2": 2.846133589744568, "step": 66 }, { "epoch": 0.2, "logps_train/policy_1_2": -140.14675903320312, "logps_train/policy_1_l": -139.7452392578125, "logps_train/policy_1_w": -162.3834686279297, "logps_train/policy_2_2": -95.21894073486328, "logps_train/policy_2_w": -235.49293518066406, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -0.8404573202133179, "rewards_train/1-l": -1.2773561477661133, "rewards_train/1-w": 2.0098955631256104, "rewards_train/2-2": 1.3257615566253662, "rewards_train/2-w": -1.5274192094802856, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.2872517108917236, "rewards_train/margins_1": 2.8503528833389282, "rewards_train/margins_2": 2.853180766105652, "step": 66 }, { "epoch": 0.2, "logps_train/policy_1_2": -175.02259826660156, "logps_train/policy_1_l": -160.0053253173828, "logps_train/policy_1_w": -146.40518188476562, "logps_train/policy_2_2": -135.35775756835938, "logps_train/policy_2_w": -187.02764892578125, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 0.32781723141670227, "rewards_train/1-l": -1.4815387725830078, "rewards_train/1-w": 2.1047935485839844, "rewards_train/2-2": 2.1489906311035156, "rewards_train/2-w": 0.26051610708236694, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.586332321166992, "rewards_train/margins_1": 1.776976317167282, "rewards_train/margins_2": 1.8884745240211487, "step": 66 }, { "epoch": 0.2, "logps_train/policy_1_2": -198.73536682128906, "logps_train/policy_1_l": -154.73529052734375, "logps_train/policy_1_w": -166.5842742919922, "logps_train/policy_2_2": -145.7459716796875, "logps_train/policy_2_w": -213.74185180664062, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -0.5047869682312012, "rewards_train/1-l": -0.7821714878082275, "rewards_train/1-w": 1.783759355545044, "rewards_train/2-2": 2.3409910202026367, "rewards_train/2-w": -0.5374646186828613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.5659308433532715, "rewards_train/margins_1": 2.288546323776245, "rewards_train/margins_2": 2.878455638885498, "step": 66 }, { "epoch": 0.2, "logps_train/policy_1_2": -135.235107421875, "logps_train/policy_1_l": -117.03594970703125, "logps_train/policy_1_w": -97.11473083496094, "logps_train/policy_2_2": -104.61945343017578, "logps_train/policy_2_w": -128.3675537109375, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -118.5, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": -0.16442950069904327, "rewards_train/1-l": -0.6730287075042725, "rewards_train/1-w": 1.0882086753845215, "rewards_train/2-2": 1.389519453048706, "rewards_train/2-w": -0.43597412109375, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 1.761237382888794, "rewards_train/margins_1": 1.2526381760835648, "rewards_train/margins_2": 1.825493574142456, "step": 67 }, { "epoch": 0.2, "logps_train/policy_1_2": -81.09040832519531, "logps_train/policy_1_l": -82.23281860351562, "logps_train/policy_1_w": -93.3329849243164, "logps_train/policy_2_2": -63.37489318847656, "logps_train/policy_2_w": -123.55752563476562, "logps_train/ref_1_2": -79.0, "logps_train/ref_1_l": -72.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -69.5, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": -0.1981036365032196, "rewards_train/1-l": -0.96527498960495, "rewards_train/1-w": 1.2873070240020752, "rewards_train/2-2": 0.5927841663360596, "rewards_train/2-w": -0.3393462896347046, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.252582013607025, "rewards_train/margins_1": 1.4854106605052948, "rewards_train/margins_2": 0.9321304559707642, "step": 67 }, { "epoch": 0.2, "logps_train/policy_1_2": -110.56976318359375, "logps_train/policy_1_l": -132.4513702392578, "logps_train/policy_1_w": -112.37257385253906, "logps_train/policy_2_2": -86.31550598144531, "logps_train/policy_2_w": -146.63597106933594, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -0.17123448848724365, "rewards_train/1-l": -0.863935649394989, "rewards_train/1-w": 1.277195930480957, "rewards_train/2-2": 0.9625901579856873, "rewards_train/2-w": -0.2143779993057251, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 2.141131579875946, "rewards_train/margins_1": 1.4484304189682007, "rewards_train/margins_2": 1.1769681572914124, "step": 67 }, { "epoch": 0.2, "logps_train/policy_1_2": -136.86172485351562, "logps_train/policy_1_l": -149.25714111328125, "logps_train/policy_1_w": -108.27645874023438, "logps_train/policy_2_2": -112.84403228759766, "logps_train/policy_2_w": -141.95962524414062, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": 0.016659080982208252, "rewards_train/1-l": -1.2794241905212402, "rewards_train/1-w": 1.166885495185852, "rewards_train/2-2": 1.2435264587402344, "rewards_train/2-w": -0.034831345081329346, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.4463096857070923, "rewards_train/margins_1": 1.1502264142036438, "rewards_train/margins_2": 1.2783578038215637, "step": 67 }, { "epoch": 0.2, "learning_rate": 4.96482514566587e-06, "loss": 1.4047, "step": 68 }, { "epoch": 0.2, "logps_train/policy_1_2": -142.822021484375, "logps_train/policy_1_l": -138.759033203125, "logps_train/policy_1_w": -114.16099548339844, "logps_train/policy_2_2": -104.14376068115234, "logps_train/policy_2_w": -154.3915557861328, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": -0.9579839706420898, "rewards_train/1-l": -1.2815687656402588, "rewards_train/1-w": 0.9881977438926697, "rewards_train/2-2": 0.9981241226196289, "rewards_train/2-w": -0.938374400138855, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.2697665095329285, "rewards_train/margins_1": 1.9461817145347595, "rewards_train/margins_2": 1.9364985227584839, "step": 68 }, { "epoch": 0.2, "logps_train/policy_1_2": -122.65005493164062, "logps_train/policy_1_l": -109.71295166015625, "logps_train/policy_1_w": -108.22978210449219, "logps_train/policy_2_2": -91.60769653320312, "logps_train/policy_2_w": -139.37139892578125, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": -0.6332187056541443, "rewards_train/1-l": -0.6009823083877563, "rewards_train/1-w": 1.0332231521606445, "rewards_train/2-2": 1.0484585762023926, "rewards_train/2-w": -0.5903639197349548, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 1.6342054605484009, "rewards_train/margins_1": 1.6664418578147888, "rewards_train/margins_2": 1.6388224959373474, "step": 68 }, { "epoch": 0.2, "logps_train/policy_1_2": -124.14806365966797, "logps_train/policy_1_l": -99.73138427734375, "logps_train/policy_1_w": -95.76863098144531, "logps_train/policy_2_2": -91.89981079101562, "logps_train/policy_2_w": -129.80502319335938, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": -0.16285327076911926, "rewards_train/1-l": -1.1255799531936646, "rewards_train/1-w": 1.4274336099624634, "rewards_train/2-2": 1.5963466167449951, "rewards_train/2-w": -0.067220538854599, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.553013563156128, "rewards_train/margins_1": 1.5902868807315826, "rewards_train/margins_2": 1.6635671555995941, "step": 68 }, { "epoch": 0.2, "logps_train/policy_1_2": -195.422119140625, "logps_train/policy_1_l": -124.21659851074219, "logps_train/policy_1_w": -121.60774993896484, "logps_train/policy_2_2": -136.4685821533203, "logps_train/policy_2_w": -163.33473205566406, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.0617424249649048, "rewards_train/1-l": -0.9993455410003662, "rewards_train/1-w": 1.8013347387313843, "rewards_train/2-2": 1.8672044277191162, "rewards_train/2-w": -0.33874672651290894, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.8006802797317505, "rewards_train/margins_1": 2.863077163696289, "rewards_train/margins_2": 2.205951154232025, "step": 68 }, { "epoch": 0.21, "logps_train/policy_1_2": -158.83865356445312, "logps_train/policy_1_l": -143.7088623046875, "logps_train/policy_1_w": -110.47440338134766, "logps_train/policy_2_2": -123.00176239013672, "logps_train/policy_2_w": -154.4036865234375, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -0.5699971914291382, "rewards_train/1-l": -1.4368340969085693, "rewards_train/1-w": 1.5635645389556885, "rewards_train/2-2": 1.3511906862258911, "rewards_train/2-w": -0.7931025624275208, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.000398635864258, "rewards_train/margins_1": 2.1335617303848267, "rewards_train/margins_2": 2.144293248653412, "step": 69 }, { "epoch": 0.21, "logps_train/policy_1_2": -240.41973876953125, "logps_train/policy_1_l": -192.61734008789062, "logps_train/policy_1_w": -173.07302856445312, "logps_train/policy_2_2": -184.85275268554688, "logps_train/policy_2_w": -219.4365692138672, "logps_train/ref_1_2": -233.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -207.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": -0.7380690574645996, "rewards_train/1-l": -1.5643705129623413, "rewards_train/1-w": 2.0282442569732666, "rewards_train/2-2": 2.18855357170105, "rewards_train/2-w": -0.34600114822387695, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.592614769935608, "rewards_train/margins_1": 2.766313314437866, "rewards_train/margins_2": 2.5345547199249268, "step": 69 }, { "epoch": 0.21, "logps_train/policy_1_2": -121.0333023071289, "logps_train/policy_1_l": -76.45564270019531, "logps_train/policy_1_w": -96.83895874023438, "logps_train/policy_2_2": -84.56678009033203, "logps_train/policy_2_w": -137.1509246826172, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -0.09747056663036346, "rewards_train/1-l": -0.6490792036056519, "rewards_train/1-w": 1.3965730667114258, "rewards_train/2-2": 1.601623296737671, "rewards_train/2-w": -0.6790568232536316, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.0456522703170776, "rewards_train/margins_1": 1.4940436333417892, "rewards_train/margins_2": 2.2806801199913025, "step": 69 }, { "epoch": 0.21, "logps_train/policy_1_2": -215.30934143066406, "logps_train/policy_1_l": -156.1685333251953, "logps_train/policy_1_w": -154.33447265625, "logps_train/policy_2_2": -168.43394470214844, "logps_train/policy_2_w": -214.51242065429688, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -0.44577834010124207, "rewards_train/1-l": -1.0303549766540527, "rewards_train/1-w": 2.505614757537842, "rewards_train/2-2": 2.124574661254883, "rewards_train/2-w": -0.7059302926063538, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5359697341918945, "rewards_train/margins_1": 2.951393097639084, "rewards_train/margins_2": 2.8305049538612366, "step": 69 }, { "epoch": 0.21, "learning_rate": 4.960576444868992e-06, "loss": 1.2602, "step": 70 }, { "epoch": 0.21, "logps_train/policy_1_2": -91.87825012207031, "logps_train/policy_1_l": -121.40019989013672, "logps_train/policy_1_w": -99.60694885253906, "logps_train/policy_2_2": -73.2916259765625, "logps_train/policy_2_w": -135.459228515625, "logps_train/ref_1_2": -96.5, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -85.5, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": 0.4418630599975586, "rewards_train/1-l": -0.9601374268531799, "rewards_train/1-w": 1.508836269378662, "rewards_train/2-2": 1.2190793752670288, "rewards_train/2-w": -0.2533436119556427, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.468973696231842, "rewards_train/margins_1": 1.0669732093811035, "rewards_train/margins_2": 1.4724229872226715, "step": 70 }, { "epoch": 0.21, "logps_train/policy_1_2": -171.06683349609375, "logps_train/policy_1_l": -144.58267211914062, "logps_train/policy_1_w": -102.08815002441406, "logps_train/policy_2_2": -128.96624755859375, "logps_train/policy_2_w": -142.97402954101562, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -0.5098086595535278, "rewards_train/1-l": -1.458852767944336, "rewards_train/1-w": 1.416869044303894, "rewards_train/2-2": 1.6028873920440674, "rewards_train/2-w": -0.6071687936782837, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.87572181224823, "rewards_train/margins_1": 1.9266777038574219, "rewards_train/margins_2": 2.210056185722351, "step": 70 }, { "epoch": 0.21, "logps_train/policy_1_2": -196.1415557861328, "logps_train/policy_1_l": -155.248046875, "logps_train/policy_1_w": -165.6074676513672, "logps_train/policy_2_2": -167.1851806640625, "logps_train/policy_2_w": -194.21392822265625, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 0.12129370123147964, "rewards_train/1-l": -0.7561516165733337, "rewards_train/1-w": 1.8657176494598389, "rewards_train/2-2": 1.5480830669403076, "rewards_train/2-w": 0.18856848776340485, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 2.6218692660331726, "rewards_train/margins_1": 1.7444239482283592, "rewards_train/margins_2": 1.3595145791769028, "step": 70 }, { "epoch": 0.21, "logps_train/policy_1_2": -188.70498657226562, "logps_train/policy_1_l": -227.17042541503906, "logps_train/policy_1_w": -138.76089477539062, "logps_train/policy_2_2": -135.4248809814453, "logps_train/policy_2_w": -184.34429931640625, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.9513593316078186, "rewards_train/1-l": -1.8939955234527588, "rewards_train/1-w": 1.7450037002563477, "rewards_train/2-2": 1.943839430809021, "rewards_train/2-w": -0.151813343167305, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.6389992237091064, "rewards_train/margins_1": 2.6963630318641663, "rewards_train/margins_2": 2.095652773976326, "step": 70 }, { "epoch": 0.21, "logps_train/policy_1_2": -232.58795166015625, "logps_train/policy_1_l": -202.86671447753906, "logps_train/policy_1_w": -115.96427917480469, "logps_train/policy_2_2": -164.98849487304688, "logps_train/policy_2_w": -165.7715606689453, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.1259821653366089, "rewards_train/1-l": -1.6005390882492065, "rewards_train/1-w": 1.7066975831985474, "rewards_train/2-2": 2.536306858062744, "rewards_train/2-w": -0.6994211077690125, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.307236671447754, "rewards_train/margins_1": 2.8326797485351562, "rewards_train/margins_2": 3.2357279658317566, "step": 71 }, { "epoch": 0.21, "logps_train/policy_1_2": -165.40264892578125, "logps_train/policy_1_l": -142.17295837402344, "logps_train/policy_1_w": -127.28094482421875, "logps_train/policy_2_2": -125.9563217163086, "logps_train/policy_2_w": -165.81683349609375, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.13284389674663544, "rewards_train/1-l": -1.3471543788909912, "rewards_train/1-w": 1.3031548261642456, "rewards_train/2-2": 1.8678441047668457, "rewards_train/2-w": -0.5992618799209595, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.650309205055237, "rewards_train/margins_1": 1.435998722910881, "rewards_train/margins_2": 2.467105984687805, "step": 71 }, { "epoch": 0.21, "logps_train/policy_1_2": -271.2712097167969, "logps_train/policy_1_l": -232.35804748535156, "logps_train/policy_1_w": -209.19459533691406, "logps_train/policy_2_2": -209.05361938476562, "logps_train/policy_2_w": -283.70782470703125, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -239.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": -1.1056382656097412, "rewards_train/1-l": -2.284242630004883, "rewards_train/1-w": 2.996556282043457, "rewards_train/2-2": 2.213047981262207, "rewards_train/2-w": -1.129377007484436, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.28079891204834, "rewards_train/margins_1": 4.102194547653198, "rewards_train/margins_2": 3.342424988746643, "step": 71 }, { "epoch": 0.21, "logps_train/policy_1_2": -200.5442352294922, "logps_train/policy_1_l": -190.0338134765625, "logps_train/policy_1_w": -140.68243408203125, "logps_train/policy_2_2": -145.86038208007812, "logps_train/policy_2_w": -200.5244140625, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -0.06965763866901398, "rewards_train/1-l": -1.3660778999328613, "rewards_train/1-w": 1.5962104797363281, "rewards_train/2-2": 2.519235134124756, "rewards_train/2-w": -1.248927354812622, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.9622883796691895, "rewards_train/margins_1": 1.665868118405342, "rewards_train/margins_2": 3.768162488937378, "step": 71 }, { "epoch": 0.22, "learning_rate": 4.956087596148824e-06, "loss": 1.261, "step": 72 }, { "epoch": 0.22, "logps_train/policy_1_2": -207.36367797851562, "logps_train/policy_1_l": -177.5745086669922, "logps_train/policy_1_w": -137.4600830078125, "logps_train/policy_2_2": -158.11058044433594, "logps_train/policy_2_w": -187.78897094726562, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.120937466621399, "rewards_train/1-l": -1.185502529144287, "rewards_train/1-w": 1.7184460163116455, "rewards_train/2-2": 1.411013126373291, "rewards_train/2-w": -0.5605374574661255, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.9039485454559326, "rewards_train/margins_1": 2.8393834829330444, "rewards_train/margins_2": 1.9715505838394165, "step": 72 }, { "epoch": 0.22, "logps_train/policy_1_2": -200.35162353515625, "logps_train/policy_1_l": -168.16476440429688, "logps_train/policy_1_w": -188.53619384765625, "logps_train/policy_2_2": -149.97747802734375, "logps_train/policy_2_w": -249.20419311523438, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": -0.9309319853782654, "rewards_train/1-l": -1.4150310754776, "rewards_train/1-w": 1.9307544231414795, "rewards_train/2-2": 1.6682194471359253, "rewards_train/2-w": -1.3215928077697754, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3457854986190796, "rewards_train/margins_1": 2.861686408519745, "rewards_train/margins_2": 2.9898122549057007, "step": 72 }, { "epoch": 0.22, "logps_train/policy_1_2": -162.92938232421875, "logps_train/policy_1_l": -158.67181396484375, "logps_train/policy_1_w": -136.1348876953125, "logps_train/policy_2_2": -123.70616149902344, "logps_train/policy_2_w": -179.8367462158203, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -0.6384457349777222, "rewards_train/1-l": -0.9530388116836548, "rewards_train/1-w": 1.557215690612793, "rewards_train/2-2": 1.2816295623779297, "rewards_train/2-w": -0.5417795181274414, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.5102545022964478, "rewards_train/margins_1": 2.195661425590515, "rewards_train/margins_2": 1.823409080505371, "step": 72 }, { "epoch": 0.22, "logps_train/policy_1_2": -107.44205474853516, "logps_train/policy_1_l": -166.6825714111328, "logps_train/policy_1_w": -107.00581359863281, "logps_train/policy_2_2": -82.0360336303711, "logps_train/policy_2_w": -136.84268188476562, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -90.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -0.3535807430744171, "rewards_train/1-l": -1.4741160869598389, "rewards_train/1-w": 1.1166059970855713, "rewards_train/2-2": 0.8342872262001038, "rewards_train/2-w": -0.2264561951160431, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 2.59072208404541, "rewards_train/margins_1": 1.4701867401599884, "rewards_train/margins_2": 1.0607434213161469, "step": 72 }, { "epoch": 0.22, "logps_train/policy_1_2": -134.13644409179688, "logps_train/policy_1_l": -98.48831176757812, "logps_train/policy_1_w": -93.83493041992188, "logps_train/policy_2_2": -96.28926086425781, "logps_train/policy_2_w": -124.08466339111328, "logps_train/ref_1_2": -126.5, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -103.5, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": -0.7780964374542236, "rewards_train/1-l": -0.9972679615020752, "rewards_train/1-w": 0.9378935098648071, "rewards_train/2-2": 1.0544726848602295, "rewards_train/2-w": -0.34830987453460693, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.9351614713668823, "rewards_train/margins_1": 1.7159899473190308, "rewards_train/margins_2": 1.4027825593948364, "step": 73 }, { "epoch": 0.22, "logps_train/policy_1_2": -209.16073608398438, "logps_train/policy_1_l": -214.9116668701172, "logps_train/policy_1_w": -187.32089233398438, "logps_train/policy_2_2": -162.0082550048828, "logps_train/policy_2_w": -240.25357055664062, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -199.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -233.0, "rewards_train/1-2": -0.15826022624969482, "rewards_train/1-l": -1.5776913166046143, "rewards_train/1-w": 2.028848886489868, "rewards_train/2-2": 2.1776905059814453, "rewards_train/2-w": -0.7269192337989807, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6065402030944824, "rewards_train/margins_1": 2.187109112739563, "rewards_train/margins_2": 2.904609739780426, "step": 73 }, { "epoch": 0.22, "logps_train/policy_1_2": -189.62335205078125, "logps_train/policy_1_l": -187.6962432861328, "logps_train/policy_1_w": -126.31449890136719, "logps_train/policy_2_2": -131.315673828125, "logps_train/policy_2_w": -181.51788330078125, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.4304990768432617, "rewards_train/1-l": -1.5661324262619019, "rewards_train/1-w": 1.6544876098632812, "rewards_train/2-2": 1.6381094455718994, "rewards_train/2-w": -0.8565976619720459, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.220620036125183, "rewards_train/margins_1": 3.084986686706543, "rewards_train/margins_2": 2.4947071075439453, "step": 73 }, { "epoch": 0.22, "logps_train/policy_1_2": -165.64697265625, "logps_train/policy_1_l": -194.62054443359375, "logps_train/policy_1_w": -111.98668670654297, "logps_train/policy_2_2": -121.3204345703125, "logps_train/policy_2_w": -157.2623291015625, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -0.8955574035644531, "rewards_train/1-l": -2.05092191696167, "rewards_train/1-w": 1.3608529567718506, "rewards_train/2-2": 1.356628179550171, "rewards_train/2-w": -0.7445927858352661, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.4117748737335205, "rewards_train/margins_1": 2.2564103603363037, "rewards_train/margins_2": 2.101220965385437, "step": 73 }, { "epoch": 0.22, "learning_rate": 4.951359037609088e-06, "loss": 1.3245, "step": 74 }, { "epoch": 0.22, "logps_train/policy_1_2": -144.14242553710938, "logps_train/policy_1_l": -157.86746215820312, "logps_train/policy_1_w": -139.21746826171875, "logps_train/policy_2_2": -104.00602722167969, "logps_train/policy_2_w": -198.65048217773438, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -0.833284854888916, "rewards_train/1-l": -1.433914065361023, "rewards_train/1-w": 1.7259085178375244, "rewards_train/2-2": 1.2162425518035889, "rewards_train/2-w": -1.3351662158966064, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.1598225831985474, "rewards_train/margins_1": 2.5591933727264404, "rewards_train/margins_2": 2.5514087677001953, "step": 74 }, { "epoch": 0.22, "logps_train/policy_1_2": -218.4867706298828, "logps_train/policy_1_l": -241.78280639648438, "logps_train/policy_1_w": -136.03607177734375, "logps_train/policy_2_2": -154.37722778320312, "logps_train/policy_2_w": -197.621337890625, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -0.5454548597335815, "rewards_train/1-l": -2.379843235015869, "rewards_train/1-w": 2.119000196456909, "rewards_train/2-2": 2.5210416316986084, "rewards_train/2-w": -1.1125240325927734, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.498843431472778, "rewards_train/margins_1": 2.6644550561904907, "rewards_train/margins_2": 3.633565664291382, "step": 74 }, { "epoch": 0.22, "logps_train/policy_1_2": -132.10267639160156, "logps_train/policy_1_l": -78.21939086914062, "logps_train/policy_1_w": -62.202083587646484, "logps_train/policy_2_2": -104.43519592285156, "logps_train/policy_2_w": -91.21247863769531, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -77.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -90.5, "rewards_train/1-2": -0.12227903306484222, "rewards_train/1-l": -0.4993802011013031, "rewards_train/1-w": 1.4768620729446411, "rewards_train/2-2": 1.2058334350585938, "rewards_train/2-w": -0.09707774221897125, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.9762422740459442, "rewards_train/margins_1": 1.5991411060094833, "rewards_train/margins_2": 1.302911177277565, "step": 74 }, { "epoch": 0.22, "logps_train/policy_1_2": -180.96319580078125, "logps_train/policy_1_l": -175.1016387939453, "logps_train/policy_1_w": -141.04022216796875, "logps_train/policy_2_2": -134.21035766601562, "logps_train/policy_2_w": -205.61912536621094, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -0.20882084965705872, "rewards_train/1-l": -1.3461624383926392, "rewards_train/1-w": 1.6721489429473877, "rewards_train/2-2": 2.2265238761901855, "rewards_train/2-w": -1.4583969116210938, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.018311381340027, "rewards_train/margins_1": 1.8809697926044464, "rewards_train/margins_2": 3.6849207878112793, "step": 74 }, { "epoch": 0.22, "logps_train/policy_1_2": -171.61534118652344, "logps_train/policy_1_l": -158.8712921142578, "logps_train/policy_1_w": -113.12042236328125, "logps_train/policy_2_2": -123.18321228027344, "logps_train/policy_2_w": -154.4344024658203, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -0.29630059003829956, "rewards_train/1-l": -1.5610787868499756, "rewards_train/1-w": 1.9242863655090332, "rewards_train/2-2": 2.2437877655029297, "rewards_train/2-w": -0.19929926097393036, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.485365152359009, "rewards_train/margins_1": 2.2205869555473328, "rewards_train/margins_2": 2.44308702647686, "step": 75 }, { "epoch": 0.22, "logps_train/policy_1_2": -187.0214385986328, "logps_train/policy_1_l": -253.1811065673828, "logps_train/policy_1_w": -162.827392578125, "logps_train/policy_2_2": -151.5604248046875, "logps_train/policy_2_w": -208.00155639648438, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -237.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -0.14394080638885498, "rewards_train/1-l": -1.573580026626587, "rewards_train/1-w": 1.736011266708374, "rewards_train/2-2": 1.6377084255218506, "rewards_train/2-w": -0.34312501549720764, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.309591293334961, "rewards_train/margins_1": 1.879952073097229, "rewards_train/margins_2": 1.9808334410190582, "step": 75 }, { "epoch": 0.22, "logps_train/policy_1_2": -149.97491455078125, "logps_train/policy_1_l": -146.20330810546875, "logps_train/policy_1_w": -162.63291931152344, "logps_train/policy_2_2": -112.07963562011719, "logps_train/policy_2_w": -218.10018920898438, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": -0.7562060356140137, "rewards_train/1-l": -0.9154484272003174, "rewards_train/1-w": 1.6878798007965088, "rewards_train/2-2": 0.9461872577667236, "rewards_train/2-w": -0.9371672868728638, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 2.603328227996826, "rewards_train/margins_1": 2.4440858364105225, "rewards_train/margins_2": 1.8833545446395874, "step": 75 }, { "epoch": 0.22, "logps_train/policy_1_2": -166.0063934326172, "logps_train/policy_1_l": -128.48268127441406, "logps_train/policy_1_w": -138.35720825195312, "logps_train/policy_2_2": -125.1357192993164, "logps_train/policy_2_w": -171.18832397460938, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.197612002491951, "rewards_train/1-l": -0.9435811638832092, "rewards_train/1-w": 1.5334203243255615, "rewards_train/2-2": 1.7883814573287964, "rewards_train/2-w": -0.1278170347213745, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.4770014882087708, "rewards_train/margins_1": 1.7310323268175125, "rewards_train/margins_2": 1.916198492050171, "step": 75 }, { "epoch": 0.23, "learning_rate": 4.9463912307487605e-06, "loss": 1.2796, "step": 76 }, { "epoch": 0.23, "logps_train/policy_1_2": -138.10885620117188, "logps_train/policy_1_l": -138.999755859375, "logps_train/policy_1_w": -114.91349792480469, "logps_train/policy_2_2": -99.56175231933594, "logps_train/policy_2_w": -172.2164764404297, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.3561983108520508, "rewards_train/1-l": -0.9763417840003967, "rewards_train/1-w": 1.6797442436218262, "rewards_train/2-2": 1.3754653930664062, "rewards_train/2-w": -1.2138354778289795, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.656086027622223, "rewards_train/margins_1": 2.035942554473877, "rewards_train/margins_2": 2.5893008708953857, "step": 76 }, { "epoch": 0.23, "logps_train/policy_1_2": -213.43899536132812, "logps_train/policy_1_l": -185.74978637695312, "logps_train/policy_1_w": -142.8986053466797, "logps_train/policy_2_2": -169.66595458984375, "logps_train/policy_2_w": -181.75064086914062, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.2890174090862274, "rewards_train/1-l": -1.4892362356185913, "rewards_train/1-w": 1.9804518222808838, "rewards_train/2-2": 2.04902982711792, "rewards_train/2-w": 0.13235867023468018, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.469688057899475, "rewards_train/margins_1": 2.269469231367111, "rewards_train/margins_2": 1.9166711568832397, "step": 76 }, { "epoch": 0.23, "logps_train/policy_1_2": -115.21588897705078, "logps_train/policy_1_l": -56.770259857177734, "logps_train/policy_1_w": -76.97164154052734, "logps_train/policy_2_2": -87.65402221679688, "logps_train/policy_2_w": -99.58219909667969, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -54.25, "logps_train/ref_1_w": -85.5, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -97.5, "rewards_train/1-2": 0.3386649489402771, "rewards_train/1-l": -0.23825649917125702, "rewards_train/1-w": 0.8385779857635498, "rewards_train/2-2": 1.4402620792388916, "rewards_train/2-w": -0.20294734835624695, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 1.0768344849348068, "rewards_train/margins_1": 0.4999130368232727, "rewards_train/margins_2": 1.6432094275951385, "step": 76 }, { "epoch": 0.23, "logps_train/policy_1_2": -166.5389404296875, "logps_train/policy_1_l": -154.31735229492188, "logps_train/policy_1_w": -115.08802795410156, "logps_train/policy_2_2": -119.48236083984375, "logps_train/policy_2_w": -156.61337280273438, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.5919787883758545, "rewards_train/1-l": -1.786716103553772, "rewards_train/1-w": 1.6388530731201172, "rewards_train/2-2": 1.6287181377410889, "rewards_train/2-w": -0.8834090232849121, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.425569176673889, "rewards_train/margins_1": 2.2308318614959717, "rewards_train/margins_2": 2.512127161026001, "step": 76 }, { "epoch": 0.23, "logps_train/policy_1_2": -121.53369140625, "logps_train/policy_1_l": -109.29693603515625, "logps_train/policy_1_w": -99.74832916259766, "logps_train/policy_2_2": -82.81511688232422, "logps_train/policy_2_w": -141.09658813476562, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -96.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -0.42172789573669434, "rewards_train/1-l": -1.0189030170440674, "rewards_train/1-w": 1.4477254152297974, "rewards_train/2-2": 1.3798162937164307, "rewards_train/2-w": -0.7358306646347046, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.4666284322738647, "rewards_train/margins_1": 1.8694533109664917, "rewards_train/margins_2": 2.1156469583511353, "step": 77 }, { "epoch": 0.23, "logps_train/policy_1_2": -131.43853759765625, "logps_train/policy_1_l": -111.29678344726562, "logps_train/policy_1_w": -101.78057861328125, "logps_train/policy_2_2": -94.57111358642578, "logps_train/policy_2_w": -147.9449462890625, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -0.5520565509796143, "rewards_train/1-l": -0.7753806710243225, "rewards_train/1-w": 0.9414979219436646, "rewards_train/2-2": 1.2850761413574219, "rewards_train/2-w": -1.0321900844573975, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.716878592967987, "rewards_train/margins_1": 1.4935544729232788, "rewards_train/margins_2": 2.3172662258148193, "step": 77 }, { "epoch": 0.23, "logps_train/policy_1_2": -194.771240234375, "logps_train/policy_1_l": -210.25503540039062, "logps_train/policy_1_w": -168.315185546875, "logps_train/policy_2_2": -143.93115234375, "logps_train/policy_2_w": -233.23423767089844, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -0.5704845190048218, "rewards_train/1-l": -2.3661279678344727, "rewards_train/1-w": 3.033324718475342, "rewards_train/2-2": 2.3545403480529785, "rewards_train/2-w": -0.7468608617782593, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.3994526863098145, "rewards_train/margins_1": 3.6038092374801636, "rewards_train/margins_2": 3.101401209831238, "step": 77 }, { "epoch": 0.23, "logps_train/policy_1_2": -177.05581665039062, "logps_train/policy_1_l": -149.84182739257812, "logps_train/policy_1_w": -169.14906311035156, "logps_train/policy_2_2": -125.91703796386719, "logps_train/policy_2_w": -219.59141540527344, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -0.7978665232658386, "rewards_train/1-l": -0.5438516139984131, "rewards_train/1-w": 2.2762069702148438, "rewards_train/2-2": 1.6622028350830078, "rewards_train/2-w": -0.07979540526866913, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.820058584213257, "rewards_train/margins_1": 3.0740734934806824, "rewards_train/margins_2": 1.741998240351677, "step": 77 }, { "epoch": 0.23, "learning_rate": 4.941184660417034e-06, "loss": 1.3429, "step": 78 }, { "epoch": 0.23, "logps_train/policy_1_2": -147.7075958251953, "logps_train/policy_1_l": -150.601318359375, "logps_train/policy_1_w": -105.39152526855469, "logps_train/policy_2_2": -110.92211151123047, "logps_train/policy_2_w": -139.52703857421875, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -0.4146070182323456, "rewards_train/1-l": -1.5651127099990845, "rewards_train/1-w": 1.253035068511963, "rewards_train/2-2": 1.306031346321106, "rewards_train/2-w": -0.5321975946426392, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.8181477785110474, "rewards_train/margins_1": 1.6676420867443085, "rewards_train/margins_2": 1.8382289409637451, "step": 78 }, { "epoch": 0.23, "logps_train/policy_1_2": -158.22933959960938, "logps_train/policy_1_l": -146.62457275390625, "logps_train/policy_1_w": -99.97801971435547, "logps_train/policy_2_2": -111.45191192626953, "logps_train/policy_2_w": -138.43588256835938, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -0.596370279788971, "rewards_train/1-l": -1.0268497467041016, "rewards_train/1-w": 1.508838415145874, "rewards_train/2-2": 1.6259034872055054, "rewards_train/2-w": -0.3201513886451721, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.5356881618499756, "rewards_train/margins_1": 2.105208694934845, "rewards_train/margins_2": 1.9460548758506775, "step": 78 }, { "epoch": 0.23, "logps_train/policy_1_2": -191.576416015625, "logps_train/policy_1_l": -139.15130615234375, "logps_train/policy_1_w": -123.1439208984375, "logps_train/policy_2_2": -146.13247680664062, "logps_train/policy_2_w": -165.14390563964844, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -0.9158443212509155, "rewards_train/1-l": -0.4625912606716156, "rewards_train/1-w": 1.331896424293518, "rewards_train/2-2": 1.6627644300460815, "rewards_train/2-w": -0.6807973384857178, "rewards_train/accuracies": 0.6875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 1.7944876849651337, "rewards_train/margins_1": 2.2477407455444336, "rewards_train/margins_2": 2.3435617685317993, "step": 78 }, { "epoch": 0.23, "logps_train/policy_1_2": -188.84625244140625, "logps_train/policy_1_l": -191.09414672851562, "logps_train/policy_1_w": -138.68179321289062, "logps_train/policy_2_2": -144.2991485595703, "logps_train/policy_2_w": -180.56597900390625, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -0.20181229710578918, "rewards_train/1-l": -1.293204665184021, "rewards_train/1-w": 1.5736184120178223, "rewards_train/2-2": 2.216374158859253, "rewards_train/2-w": -0.6620664596557617, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.8668230772018433, "rewards_train/margins_1": 1.7754307091236115, "rewards_train/margins_2": 2.8784406185150146, "step": 78 }, { "epoch": 0.24, "logps_train/policy_1_2": -137.792724609375, "logps_train/policy_1_l": -133.99002075195312, "logps_train/policy_1_w": -95.09365844726562, "logps_train/policy_2_2": -99.5943832397461, "logps_train/policy_2_w": -136.19219970703125, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": -0.9523189067840576, "rewards_train/1-l": -0.844900369644165, "rewards_train/1-w": 1.0246179103851318, "rewards_train/2-2": 1.115170955657959, "rewards_train/2-w": -0.9063289761543274, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 1.8695182800292969, "rewards_train/margins_1": 1.9769368171691895, "rewards_train/margins_2": 2.0214999318122864, "step": 79 }, { "epoch": 0.24, "logps_train/policy_1_2": -145.8628692626953, "logps_train/policy_1_l": -89.94861602783203, "logps_train/policy_1_w": -95.00042724609375, "logps_train/policy_2_2": -110.98179626464844, "logps_train/policy_2_w": -133.74850463867188, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": 0.0008218139410018921, "rewards_train/1-l": -0.9154736399650574, "rewards_train/1-w": 1.9024968147277832, "rewards_train/2-2": 1.8428356647491455, "rewards_train/2-w": 0.07016822695732117, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.8179704546928406, "rewards_train/margins_1": 1.9016750007867813, "rewards_train/margins_2": 1.7726674377918243, "step": 79 }, { "epoch": 0.24, "logps_train/policy_1_2": -181.92263793945312, "logps_train/policy_1_l": -197.2379150390625, "logps_train/policy_1_w": -135.17759704589844, "logps_train/policy_2_2": -118.26945495605469, "logps_train/policy_2_w": -191.4159698486328, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.9511510133743286, "rewards_train/1-l": -0.7929794192314148, "rewards_train/1-w": 1.813197135925293, "rewards_train/2-2": 2.2021069526672363, "rewards_train/2-w": -0.843063235282898, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.6061765551567078, "rewards_train/margins_1": 2.7643481492996216, "rewards_train/margins_2": 3.0451701879501343, "step": 79 }, { "epoch": 0.24, "logps_train/policy_1_2": -280.40118408203125, "logps_train/policy_1_l": -292.9780578613281, "logps_train/policy_1_w": -176.35784912109375, "logps_train/policy_2_2": -209.34251403808594, "logps_train/policy_2_w": -228.3603973388672, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -264.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -1.2659013271331787, "rewards_train/1-l": -2.8218774795532227, "rewards_train/1-w": 2.3503482341766357, "rewards_train/2-2": 2.6188743114471436, "rewards_train/2-w": -0.2522510886192322, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.172225713729858, "rewards_train/margins_1": 3.6162495613098145, "rewards_train/margins_2": 2.8711254000663757, "step": 79 }, { "epoch": 0.24, "learning_rate": 4.935739834765994e-06, "loss": 1.3196, "step": 80 }, { "epoch": 0.24, "logps_train/policy_1_2": -196.22329711914062, "logps_train/policy_1_l": -153.23175048828125, "logps_train/policy_1_w": -160.3697052001953, "logps_train/policy_2_2": -154.95404052734375, "logps_train/policy_2_w": -245.5870361328125, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": 0.29173150658607483, "rewards_train/1-l": -1.0520803928375244, "rewards_train/1-w": 3.3755292892456055, "rewards_train/2-2": 2.2061591148376465, "rewards_train/2-w": -1.377454161643982, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.42760968208313, "rewards_train/margins_1": 3.0837977826595306, "rewards_train/margins_2": 3.5836132764816284, "step": 80 }, { "epoch": 0.24, "logps_train/policy_1_2": -174.84115600585938, "logps_train/policy_1_l": -168.38961791992188, "logps_train/policy_1_w": -158.93722534179688, "logps_train/policy_2_2": -126.90766906738281, "logps_train/policy_2_w": -213.29823303222656, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -0.7473965883255005, "rewards_train/1-l": -1.4177933931350708, "rewards_train/1-w": 2.141435146331787, "rewards_train/2-2": 1.7096233367919922, "rewards_train/2-w": -0.7364634275436401, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.559228539466858, "rewards_train/margins_1": 2.8888317346572876, "rewards_train/margins_2": 2.4460867643356323, "step": 80 }, { "epoch": 0.24, "logps_train/policy_1_2": -123.66658782958984, "logps_train/policy_1_l": -136.037841796875, "logps_train/policy_1_w": -139.3433837890625, "logps_train/policy_2_2": -80.41476440429688, "logps_train/policy_2_w": -198.1904296875, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.5994712114334106, "rewards_train/1-l": -1.5352296829223633, "rewards_train/1-w": 1.7670297622680664, "rewards_train/2-2": 1.2753210067749023, "rewards_train/2-w": -0.8229501843452454, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.3022594451904297, "rewards_train/margins_1": 2.366500973701477, "rewards_train/margins_2": 2.0982711911201477, "step": 80 }, { "epoch": 0.24, "logps_train/policy_1_2": -216.48255920410156, "logps_train/policy_1_l": -219.68887329101562, "logps_train/policy_1_w": -179.45169067382812, "logps_train/policy_2_2": -158.47698974609375, "logps_train/policy_2_w": -252.37060546875, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": -1.1420066356658936, "rewards_train/1-l": -1.9183011054992676, "rewards_train/1-w": 2.253268241882324, "rewards_train/2-2": 2.0546460151672363, "rewards_train/2-w": -1.255810260772705, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.171569347381592, "rewards_train/margins_1": 3.3952748775482178, "rewards_train/margins_2": 3.3104562759399414, "step": 80 }, { "epoch": 0.24, "logps_train/policy_1_2": -144.841552734375, "logps_train/policy_1_l": -149.14413452148438, "logps_train/policy_1_w": -130.70864868164062, "logps_train/policy_2_2": -103.88997650146484, "logps_train/policy_2_w": -183.71981811523438, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -0.5304445624351501, "rewards_train/1-l": -1.2139246463775635, "rewards_train/1-w": 1.9471049308776855, "rewards_train/2-2": 1.6324868202209473, "rewards_train/2-w": -0.7313558459281921, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.161029577255249, "rewards_train/margins_1": 2.4775494933128357, "rewards_train/margins_2": 2.3638426661491394, "step": 81 }, { "epoch": 0.24, "logps_train/policy_1_2": -148.22418212890625, "logps_train/policy_1_l": -161.3728790283203, "logps_train/policy_1_w": -140.08767700195312, "logps_train/policy_2_2": -100.13864135742188, "logps_train/policy_2_w": -211.57821655273438, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.1506414413452148, "rewards_train/1-l": -1.615217685699463, "rewards_train/1-w": 1.9354708194732666, "rewards_train/2-2": 1.1569368839263916, "rewards_train/2-w": -2.169931650161743, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5506885051727295, "rewards_train/margins_1": 3.0861122608184814, "rewards_train/margins_2": 3.3268685340881348, "step": 81 }, { "epoch": 0.24, "logps_train/policy_1_2": -161.2213592529297, "logps_train/policy_1_l": -151.3973388671875, "logps_train/policy_1_w": -135.8773193359375, "logps_train/policy_2_2": -123.92784881591797, "logps_train/policy_2_w": -188.4949951171875, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -0.7377610206604004, "rewards_train/1-l": -1.7070183753967285, "rewards_train/1-w": 2.0587525367736816, "rewards_train/2-2": 1.3810430765151978, "rewards_train/2-w": -0.44637438654899597, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.76577091217041, "rewards_train/margins_1": 2.796513557434082, "rewards_train/margins_2": 1.8274174630641937, "step": 81 }, { "epoch": 0.24, "logps_train/policy_1_2": -262.17437744140625, "logps_train/policy_1_l": -196.80032348632812, "logps_train/policy_1_w": -172.59320068359375, "logps_train/policy_2_2": -188.78753662109375, "logps_train/policy_2_w": -242.657958984375, "logps_train/ref_1_2": -249.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": -1.313140869140625, "rewards_train/1-l": -1.1917507648468018, "rewards_train/1-w": 2.3656797409057617, "rewards_train/2-2": 2.68062162399292, "rewards_train/2-w": -1.3634519577026367, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.5574305057525635, "rewards_train/margins_1": 3.6788206100463867, "rewards_train/margins_2": 4.044073581695557, "step": 81 }, { "epoch": 0.25, "learning_rate": 4.930057285201028e-06, "loss": 1.1044, "step": 82 }, { "epoch": 0.25, "logps_train/policy_1_2": -119.32775115966797, "logps_train/policy_1_l": -98.06360626220703, "logps_train/policy_1_w": -74.3897933959961, "logps_train/policy_2_2": -87.72709655761719, "logps_train/policy_2_w": -101.94805908203125, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": -0.07491394877433777, "rewards_train/1-l": -0.9032353758811951, "rewards_train/1-w": 0.9778172969818115, "rewards_train/2-2": 1.683539867401123, "rewards_train/2-w": -0.4237856864929199, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.5, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.8810526728630066, "rewards_train/margins_1": 1.0527312457561493, "rewards_train/margins_2": 2.107325553894043, "step": 82 }, { "epoch": 0.25, "logps_train/policy_1_2": -227.32701110839844, "logps_train/policy_1_l": -211.84121704101562, "logps_train/policy_1_w": -127.47260284423828, "logps_train/policy_2_2": -159.11239624023438, "logps_train/policy_2_w": -181.80764770507812, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.4073107242584229, "rewards_train/1-l": -1.832755208015442, "rewards_train/1-w": 1.2577685117721558, "rewards_train/2-2": 2.260146141052246, "rewards_train/2-w": -1.3709019422531128, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0905237197875977, "rewards_train/margins_1": 2.6650792360305786, "rewards_train/margins_2": 3.631048083305359, "step": 82 }, { "epoch": 0.25, "logps_train/policy_1_2": -177.62933349609375, "logps_train/policy_1_l": -198.35903930664062, "logps_train/policy_1_w": -134.287109375, "logps_train/policy_2_2": -134.6147003173828, "logps_train/policy_2_w": -183.86231994628906, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -0.20531566441059113, "rewards_train/1-l": -2.5263333320617676, "rewards_train/1-w": 1.8115243911743164, "rewards_train/2-2": 1.9547406435012817, "rewards_train/2-w": -0.9249052405357361, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.337857723236084, "rewards_train/margins_1": 2.0168400555849075, "rewards_train/margins_2": 2.879645884037018, "step": 82 }, { "epoch": 0.25, "logps_train/policy_1_2": -168.68148803710938, "logps_train/policy_1_l": -181.642333984375, "logps_train/policy_1_w": -153.3468017578125, "logps_train/policy_2_2": -132.48361206054688, "logps_train/policy_2_w": -196.665771484375, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.02439950779080391, "rewards_train/1-l": -2.31052303314209, "rewards_train/1-w": 2.0903210639953613, "rewards_train/2-2": 1.725028157234192, "rewards_train/2-w": -0.5021730065345764, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.400844097137451, "rewards_train/margins_1": 2.1147205717861652, "rewards_train/margins_2": 2.2272011637687683, "step": 82 }, { "epoch": 0.25, "logps_train/policy_1_2": -234.82510375976562, "logps_train/policy_1_l": -157.8840789794922, "logps_train/policy_1_w": -124.53373718261719, "logps_train/policy_2_2": -157.9154815673828, "logps_train/policy_2_w": -179.30438232421875, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.6231343746185303, "rewards_train/1-l": -1.373143196105957, "rewards_train/1-w": 1.7524852752685547, "rewards_train/2-2": 2.553372859954834, "rewards_train/2-w": -0.9496270418167114, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.1256284713745117, "rewards_train/margins_1": 3.375619649887085, "rewards_train/margins_2": 3.5029999017715454, "step": 83 }, { "epoch": 0.25, "logps_train/policy_1_2": -118.97633361816406, "logps_train/policy_1_l": -118.06320190429688, "logps_train/policy_1_w": -91.20629119873047, "logps_train/policy_2_2": -82.48365020751953, "logps_train/policy_2_w": -134.85081481933594, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -0.3622811734676361, "rewards_train/1-l": -1.1915984153747559, "rewards_train/1-w": 1.4802011251449585, "rewards_train/2-2": 1.5649158954620361, "rewards_train/2-w": -0.5087147951126099, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.6717995405197144, "rewards_train/margins_1": 1.8424822986125946, "rewards_train/margins_2": 2.073630690574646, "step": 83 }, { "epoch": 0.25, "logps_train/policy_1_2": -182.216064453125, "logps_train/policy_1_l": -128.68862915039062, "logps_train/policy_1_w": -85.33229064941406, "logps_train/policy_2_2": -123.3964614868164, "logps_train/policy_2_w": -127.42985534667969, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -99.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -117.5, "rewards_train/1-2": -1.5188730955123901, "rewards_train/1-l": -1.663198709487915, "rewards_train/1-w": 1.409142017364502, "rewards_train/2-2": 1.6541032791137695, "rewards_train/2-w": -0.9825115203857422, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.072340726852417, "rewards_train/margins_1": 2.928015112876892, "rewards_train/margins_2": 2.6366147994995117, "step": 83 }, { "epoch": 0.25, "logps_train/policy_1_2": -191.3058624267578, "logps_train/policy_1_l": -201.5543670654297, "logps_train/policy_1_w": -144.59759521484375, "logps_train/policy_2_2": -159.96871948242188, "logps_train/policy_2_w": -189.50843811035156, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.005586668848991394, "rewards_train/1-l": -2.097623825073242, "rewards_train/1-w": 2.2960991859436035, "rewards_train/2-2": 1.7699249982833862, "rewards_train/2-w": 0.09446855634450912, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.393723011016846, "rewards_train/margins_1": 2.301685854792595, "rewards_train/margins_2": 1.675456441938877, "step": 83 }, { "epoch": 0.25, "learning_rate": 4.924137566328951e-06, "loss": 1.2067, "step": 84 }, { "epoch": 0.25, "logps_train/policy_1_2": -139.3340301513672, "logps_train/policy_1_l": -120.11030578613281, "logps_train/policy_1_w": -142.1666259765625, "logps_train/policy_2_2": -112.56582641601562, "logps_train/policy_2_w": -180.8130645751953, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -0.14785680174827576, "rewards_train/1-l": -1.3436965942382812, "rewards_train/1-w": 1.8040411472320557, "rewards_train/2-2": 1.2850192785263062, "rewards_train/2-w": -0.21060335636138916, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.147737741470337, "rewards_train/margins_1": 1.9518979489803314, "rewards_train/margins_2": 1.4956226348876953, "step": 84 }, { "epoch": 0.25, "logps_train/policy_1_2": -116.43209838867188, "logps_train/policy_1_l": -150.50479125976562, "logps_train/policy_1_w": -101.85563659667969, "logps_train/policy_2_2": -85.84335327148438, "logps_train/policy_2_w": -139.61708068847656, "logps_train/ref_1_2": -108.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -0.8701627254486084, "rewards_train/1-l": -1.5697171688079834, "rewards_train/1-w": 1.1063063144683838, "rewards_train/2-2": 0.8031652569770813, "rewards_train/2-w": -0.8901267051696777, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.676023483276367, "rewards_train/margins_1": 1.9764690399169922, "rewards_train/margins_2": 1.693291962146759, "step": 84 }, { "epoch": 0.25, "logps_train/policy_1_2": -219.37344360351562, "logps_train/policy_1_l": -204.4417724609375, "logps_train/policy_1_w": -149.57635498046875, "logps_train/policy_2_2": -148.97340393066406, "logps_train/policy_2_w": -225.3871307373047, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -1.5955469608306885, "rewards_train/1-l": -1.8237659931182861, "rewards_train/1-w": 2.1122851371765137, "rewards_train/2-2": 2.0969951152801514, "rewards_train/2-w": -2.052581310272217, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9360511302948, "rewards_train/margins_1": 3.707832098007202, "rewards_train/margins_2": 4.149576425552368, "step": 84 }, { "epoch": 0.25, "logps_train/policy_1_2": -245.0836181640625, "logps_train/policy_1_l": -252.15325927734375, "logps_train/policy_1_w": -135.40664672851562, "logps_train/policy_2_2": -163.70211791992188, "logps_train/policy_2_w": -201.3202362060547, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -225.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.786487340927124, "rewards_train/1-l": -2.6758720874786377, "rewards_train/1-w": 2.1804280281066895, "rewards_train/2-2": 2.4901413917541504, "rewards_train/2-w": -1.3195226192474365, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.856300115585327, "rewards_train/margins_1": 3.9669153690338135, "rewards_train/margins_2": 3.809664011001587, "step": 84 }, { "epoch": 0.25, "logps_train/policy_1_2": -167.15423583984375, "logps_train/policy_1_l": -146.03988647460938, "logps_train/policy_1_w": -102.58920288085938, "logps_train/policy_2_2": -118.108642578125, "logps_train/policy_2_w": -161.63540649414062, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": -0.6802677512168884, "rewards_train/1-l": -1.3387537002563477, "rewards_train/1-w": 1.5694000720977783, "rewards_train/2-2": 1.9566895961761475, "rewards_train/2-w": -1.6631513833999634, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.908153772354126, "rewards_train/margins_1": 2.2496678233146667, "rewards_train/margins_2": 3.619840979576111, "step": 85 }, { "epoch": 0.25, "logps_train/policy_1_2": -136.79379272460938, "logps_train/policy_1_l": -117.58647155761719, "logps_train/policy_1_w": -97.89878845214844, "logps_train/policy_2_2": -104.63643646240234, "logps_train/policy_2_w": -131.44735717773438, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -105.5, "logps_train/ref_1_w": -112.5, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": -0.25574666261672974, "rewards_train/1-l": -1.23293936252594, "rewards_train/1-w": 1.474623203277588, "rewards_train/2-2": 1.264139175415039, "rewards_train/2-w": -0.4332619309425354, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.707562565803528, "rewards_train/margins_1": 1.7303698658943176, "rewards_train/margins_2": 1.6974011063575745, "step": 85 }, { "epoch": 0.25, "logps_train/policy_1_2": -234.10806274414062, "logps_train/policy_1_l": -226.59054565429688, "logps_train/policy_1_w": -152.89215087890625, "logps_train/policy_2_2": -169.09014892578125, "logps_train/policy_2_w": -219.6324462890625, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -1.209542989730835, "rewards_train/1-l": -2.4671645164489746, "rewards_train/1-w": 2.493597984313965, "rewards_train/2-2": 2.30942440032959, "rewards_train/2-w": -0.5491834878921509, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.9607625007629395, "rewards_train/margins_1": 3.7031409740448, "rewards_train/margins_2": 2.8586078882217407, "step": 85 }, { "epoch": 0.25, "logps_train/policy_1_2": -153.9658660888672, "logps_train/policy_1_l": -160.34344482421875, "logps_train/policy_1_w": -120.14942932128906, "logps_train/policy_2_2": -111.15567016601562, "logps_train/policy_2_w": -166.58895874023438, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -0.4637737274169922, "rewards_train/1-l": -1.5843435525894165, "rewards_train/1-w": 1.7241194248199463, "rewards_train/2-2": 1.6680269241333008, "rewards_train/2-w": -0.6299911141395569, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.308462977409363, "rewards_train/margins_1": 2.1878931522369385, "rewards_train/margins_2": 2.2980180382728577, "step": 85 }, { "epoch": 0.26, "learning_rate": 4.917981255903893e-06, "loss": 1.2084, "step": 86 }, { "epoch": 0.26, "logps_train/policy_1_2": -136.17083740234375, "logps_train/policy_1_l": -123.09893035888672, "logps_train/policy_1_w": -90.74862670898438, "logps_train/policy_2_2": -87.83726501464844, "logps_train/policy_2_w": -132.8692169189453, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": -0.5038039684295654, "rewards_train/1-l": -1.1782797574996948, "rewards_train/1-w": 1.5268672704696655, "rewards_train/2-2": 1.800550937652588, "rewards_train/2-w": -0.7307212948799133, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.7051470279693604, "rewards_train/margins_1": 2.030671238899231, "rewards_train/margins_2": 2.531272232532501, "step": 86 }, { "epoch": 0.26, "logps_train/policy_1_2": -117.96859741210938, "logps_train/policy_1_l": -167.24200439453125, "logps_train/policy_1_w": -102.5578384399414, "logps_train/policy_2_2": -89.0810546875, "logps_train/policy_2_w": -134.82774353027344, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": -0.02713298797607422, "rewards_train/1-l": -1.9244457483291626, "rewards_train/1-w": 1.2570576667785645, "rewards_train/2-2": 1.482715129852295, "rewards_train/2-w": -0.3175400495529175, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.181503415107727, "rewards_train/margins_1": 1.2841906547546387, "rewards_train/margins_2": 1.8002551794052124, "step": 86 }, { "epoch": 0.26, "logps_train/policy_1_2": -225.97630310058594, "logps_train/policy_1_l": -204.2420196533203, "logps_train/policy_1_w": -162.36370849609375, "logps_train/policy_2_2": -163.77450561523438, "logps_train/policy_2_w": -213.5357208251953, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": -1.8291726112365723, "rewards_train/1-l": -1.4708822965621948, "rewards_train/1-w": 1.205816626548767, "rewards_train/2-2": 1.6066304445266724, "rewards_train/2-w": -1.2645082473754883, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.676698923110962, "rewards_train/margins_1": 3.0349892377853394, "rewards_train/margins_2": 2.8711386919021606, "step": 86 }, { "epoch": 0.26, "logps_train/policy_1_2": -188.0623321533203, "logps_train/policy_1_l": -204.41319274902344, "logps_train/policy_1_w": -128.3582000732422, "logps_train/policy_2_2": -140.60292053222656, "logps_train/policy_2_w": -180.26202392578125, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -0.713166356086731, "rewards_train/1-l": -2.110753297805786, "rewards_train/1-w": 1.8450391292572021, "rewards_train/2-2": 1.9812123775482178, "rewards_train/2-w": -0.5965150594711304, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9557924270629883, "rewards_train/margins_1": 2.558205485343933, "rewards_train/margins_2": 2.577727437019348, "step": 86 }, { "epoch": 0.26, "logps_train/policy_1_2": -184.0747833251953, "logps_train/policy_1_l": -166.48963928222656, "logps_train/policy_1_w": -144.24005126953125, "logps_train/policy_2_2": -137.54440307617188, "logps_train/policy_2_w": -210.35214233398438, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -0.7066981792449951, "rewards_train/1-l": -1.4294331073760986, "rewards_train/1-w": 2.2924020290374756, "rewards_train/2-2": 1.7744660377502441, "rewards_train/2-w": -1.3711525201797485, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.721835136413574, "rewards_train/margins_1": 2.9991002082824707, "rewards_train/margins_2": 3.1456185579299927, "step": 87 }, { "epoch": 0.26, "logps_train/policy_1_2": -79.65556335449219, "logps_train/policy_1_l": -95.94639587402344, "logps_train/policy_1_w": -58.871360778808594, "logps_train/policy_2_2": -57.07548522949219, "logps_train/policy_2_w": -83.56466674804688, "logps_train/ref_1_2": -74.0, "logps_train/ref_1_l": -87.5, "logps_train/ref_1_w": -67.0, "logps_train/ref_2_2": -62.25, "logps_train/ref_2_w": -79.5, "rewards_train/1-2": -0.5659465193748474, "rewards_train/1-l": -0.8089892864227295, "rewards_train/1-w": 0.8284645080566406, "rewards_train/2-2": 0.5160840749740601, "rewards_train/2-w": -0.4069555103778839, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 1.6374537944793701, "rewards_train/margins_1": 1.394411027431488, "rewards_train/margins_2": 0.923039585351944, "step": 87 }, { "epoch": 0.26, "logps_train/policy_1_2": -191.3046875, "logps_train/policy_1_l": -163.7010498046875, "logps_train/policy_1_w": -141.97930908203125, "logps_train/policy_2_2": -134.9788818359375, "logps_train/policy_2_w": -213.39404296875, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -0.9634765982627869, "rewards_train/1-l": -1.765221118927002, "rewards_train/1-w": 2.3005552291870117, "rewards_train/2-2": 1.9088501930236816, "rewards_train/2-w": -1.680395245552063, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.065776348114014, "rewards_train/margins_1": 3.2640318274497986, "rewards_train/margins_2": 3.5892454385757446, "step": 87 }, { "epoch": 0.26, "logps_train/policy_1_2": -224.1290283203125, "logps_train/policy_1_l": -165.67413330078125, "logps_train/policy_1_w": -182.36471557617188, "logps_train/policy_2_2": -168.68313598632812, "logps_train/policy_2_w": -244.09381103515625, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": -0.9464952945709229, "rewards_train/1-l": -1.1676080226898193, "rewards_train/1-w": 1.8088405132293701, "rewards_train/2-2": 2.2574687004089355, "rewards_train/2-w": -1.9109435081481934, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.9764485359191895, "rewards_train/margins_1": 2.755335807800293, "rewards_train/margins_2": 4.168412208557129, "step": 87 }, { "epoch": 0.26, "learning_rate": 4.9115889547708975e-06, "loss": 1.3056, "step": 88 }, { "epoch": 0.26, "logps_train/policy_1_2": -175.11361694335938, "logps_train/policy_1_l": -124.17877197265625, "logps_train/policy_1_w": -121.83914184570312, "logps_train/policy_2_2": -136.62078857421875, "logps_train/policy_2_w": -162.23777770996094, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": -0.3592124283313751, "rewards_train/1-l": -1.037018060684204, "rewards_train/1-w": 1.8106175661087036, "rewards_train/2-2": 1.7408514022827148, "rewards_train/2-w": -0.6729963421821594, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.8476356267929077, "rewards_train/margins_1": 2.1698299944400787, "rewards_train/margins_2": 2.4138477444648743, "step": 88 }, { "epoch": 0.26, "logps_train/policy_1_2": -147.26393127441406, "logps_train/policy_1_l": -106.105712890625, "logps_train/policy_1_w": -120.14488983154297, "logps_train/policy_2_2": -104.592041015625, "logps_train/policy_2_w": -169.40646362304688, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -97.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -0.8783470392227173, "rewards_train/1-l": -0.8667240142822266, "rewards_train/1-w": 1.8211069107055664, "rewards_train/2-2": 1.2068121433258057, "rewards_train/2-w": -0.7359592914581299, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.687830924987793, "rewards_train/margins_1": 2.6994539499282837, "rewards_train/margins_2": 1.9427714347839355, "step": 88 }, { "epoch": 0.26, "logps_train/policy_1_2": -150.90560913085938, "logps_train/policy_1_l": -106.15719604492188, "logps_train/policy_1_w": -116.74076080322266, "logps_train/policy_2_2": -106.17765808105469, "logps_train/policy_2_w": -174.78619384765625, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -118.5, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -0.9923197031021118, "rewards_train/1-l": -1.011716365814209, "rewards_train/1-w": 1.855416178703308, "rewards_train/2-2": 1.2139848470687866, "rewards_train/2-w": -1.2503851652145386, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.867132544517517, "rewards_train/margins_1": 2.84773588180542, "rewards_train/margins_2": 2.464370012283325, "step": 88 }, { "epoch": 0.26, "logps_train/policy_1_2": -106.596923828125, "logps_train/policy_1_l": -127.02590942382812, "logps_train/policy_1_w": -78.84920501708984, "logps_train/policy_2_2": -74.34516906738281, "logps_train/policy_2_w": -122.61243438720703, "logps_train/ref_1_2": -100.5, "logps_train/ref_1_l": -112.5, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": -0.6089114546775818, "rewards_train/1-l": -1.4669466018676758, "rewards_train/1-w": 1.2861733436584473, "rewards_train/2-2": 1.0377490520477295, "rewards_train/2-w": -0.9829235076904297, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.753119945526123, "rewards_train/margins_1": 1.895084798336029, "rewards_train/margins_2": 2.020672559738159, "step": 88 }, { "epoch": 0.27, "logps_train/policy_1_2": -132.85731506347656, "logps_train/policy_1_l": -164.01138305664062, "logps_train/policy_1_w": -125.70018005371094, "logps_train/policy_2_2": -98.1198501586914, "logps_train/policy_2_w": -164.50677490234375, "logps_train/ref_1_2": -126.5, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -0.6439348459243774, "rewards_train/1-l": -1.64371657371521, "rewards_train/1-w": 1.9823259115219116, "rewards_train/2-2": 1.1622340679168701, "rewards_train/2-w": -0.17177224159240723, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.6260424852371216, "rewards_train/margins_1": 2.626260757446289, "rewards_train/margins_2": 1.3340063095092773, "step": 89 }, { "epoch": 0.27, "logps_train/policy_1_2": -103.07176971435547, "logps_train/policy_1_l": -131.099365234375, "logps_train/policy_1_w": -84.05205535888672, "logps_train/policy_2_2": -76.03756713867188, "logps_train/policy_2_w": -117.61902618408203, "logps_train/ref_1_2": -99.5, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -88.5, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": -0.3481932282447815, "rewards_train/1-l": -1.1467527151107788, "rewards_train/1-w": 1.0955761671066284, "rewards_train/2-2": 1.2379915714263916, "rewards_train/2-w": -0.7663946151733398, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.2423288822174072, "rewards_train/margins_1": 1.44376939535141, "rewards_train/margins_2": 2.0043861865997314, "step": 89 }, { "epoch": 0.27, "logps_train/policy_1_2": -141.83274841308594, "logps_train/policy_1_l": -182.06112670898438, "logps_train/policy_1_w": -136.15049743652344, "logps_train/policy_2_2": -106.37138366699219, "logps_train/policy_2_w": -179.53228759765625, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -118.5, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -0.561009407043457, "rewards_train/1-l": -1.9618737697601318, "rewards_train/1-w": 1.9320582151412964, "rewards_train/2-2": 1.2081739902496338, "rewards_train/2-w": -0.4075269103050232, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8939319849014282, "rewards_train/margins_1": 2.4930676221847534, "rewards_train/margins_2": 1.615700900554657, "step": 89 }, { "epoch": 0.27, "logps_train/policy_1_2": -158.73626708984375, "logps_train/policy_1_l": -169.09921264648438, "logps_train/policy_1_w": -176.64808654785156, "logps_train/policy_2_2": -109.92255401611328, "logps_train/policy_2_w": -231.72509765625, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -0.8087825775146484, "rewards_train/1-l": -1.8342869281768799, "rewards_train/1-w": 2.304624557495117, "rewards_train/2-2": 2.0413382053375244, "rewards_train/2-w": -0.9422364234924316, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.138911485671997, "rewards_train/margins_1": 3.1134071350097656, "rewards_train/margins_2": 2.983574628829956, "step": 89 }, { "epoch": 0.27, "learning_rate": 4.904961286807285e-06, "loss": 1.3251, "step": 90 }, { "epoch": 0.27, "logps_train/policy_1_2": -185.7587432861328, "logps_train/policy_1_l": -166.75384521484375, "logps_train/policy_1_w": -140.65951538085938, "logps_train/policy_2_2": -129.27244567871094, "logps_train/policy_2_w": -208.1683807373047, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.2586864233016968, "rewards_train/1-l": -1.5357372760772705, "rewards_train/1-w": 2.0863914489746094, "rewards_train/2-2": 1.7897474765777588, "rewards_train/2-w": -1.6920337677001953, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.62212872505188, "rewards_train/margins_1": 3.345077872276306, "rewards_train/margins_2": 3.481781244277954, "step": 90 }, { "epoch": 0.27, "logps_train/policy_1_2": -170.88275146484375, "logps_train/policy_1_l": -85.35566711425781, "logps_train/policy_1_w": -64.27999114990234, "logps_train/policy_2_2": -115.72782897949219, "logps_train/policy_2_w": -96.3255386352539, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -75.5, "logps_train/ref_1_w": -72.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -89.5, "rewards_train/1-2": -1.779289722442627, "rewards_train/1-l": -1.0072221755981445, "rewards_train/1-w": 0.7704383134841919, "rewards_train/2-2": 1.3891313076019287, "rewards_train/2-w": -0.6938823461532593, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.7776604890823364, "rewards_train/margins_1": 2.549728035926819, "rewards_train/margins_2": 2.083013653755188, "step": 90 }, { "epoch": 0.27, "logps_train/policy_1_2": -194.7109375, "logps_train/policy_1_l": -203.8453369140625, "logps_train/policy_1_w": -142.39187622070312, "logps_train/policy_2_2": -146.3389892578125, "logps_train/policy_2_w": -187.7155303955078, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -0.934570848941803, "rewards_train/1-l": -2.2942018508911133, "rewards_train/1-w": 1.49342942237854, "rewards_train/2-2": 1.5043821334838867, "rewards_train/2-w": -0.8313186168670654, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.7876312732696533, "rewards_train/margins_1": 2.428000271320343, "rewards_train/margins_2": 2.335700750350952, "step": 90 }, { "epoch": 0.27, "logps_train/policy_1_2": -131.74142456054688, "logps_train/policy_1_l": -130.4170684814453, "logps_train/policy_1_w": -97.95804595947266, "logps_train/policy_2_2": -94.8094482421875, "logps_train/policy_2_w": -132.52244567871094, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -112.5, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": -0.7120331525802612, "rewards_train/1-l": -1.2221262454986572, "rewards_train/1-w": 1.4497523307800293, "rewards_train/2-2": 1.146594762802124, "rewards_train/2-w": -0.4894393980503082, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.6718785762786865, "rewards_train/margins_1": 2.1617854833602905, "rewards_train/margins_2": 1.6360341608524323, "step": 90 }, { "epoch": 0.27, "logps_train/policy_1_2": -147.44020080566406, "logps_train/policy_1_l": -131.00518798828125, "logps_train/policy_1_w": -104.91393280029297, "logps_train/policy_2_2": -106.31692504882812, "logps_train/policy_2_w": -156.4896240234375, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.19655901193618774, "rewards_train/1-l": -1.2008851766586304, "rewards_train/1-w": 1.8364388942718506, "rewards_train/2-2": 1.7229950428009033, "rewards_train/2-w": -0.886756420135498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.037324070930481, "rewards_train/margins_1": 2.0329979062080383, "rewards_train/margins_2": 2.6097514629364014, "step": 91 }, { "epoch": 0.27, "logps_train/policy_1_2": -252.70025634765625, "logps_train/policy_1_l": -225.90167236328125, "logps_train/policy_1_w": -166.95533752441406, "logps_train/policy_2_2": -167.65675354003906, "logps_train/policy_2_w": -229.7021484375, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": -2.14815092086792, "rewards_train/1-l": -2.95979642868042, "rewards_train/1-w": 2.2966537475585938, "rewards_train/2-2": 2.883544445037842, "rewards_train/2-w": -0.6655272245407104, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.256450176239014, "rewards_train/margins_1": 4.444804668426514, "rewards_train/margins_2": 3.5490716695785522, "step": 91 }, { "epoch": 0.27, "logps_train/policy_1_2": -205.98968505859375, "logps_train/policy_1_l": -143.31271362304688, "logps_train/policy_1_w": -147.10081481933594, "logps_train/policy_2_2": -144.44313049316406, "logps_train/policy_2_w": -205.16143798828125, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.405560851097107, "rewards_train/1-l": -1.1422094106674194, "rewards_train/1-w": 1.9633557796478271, "rewards_train/2-2": 2.0995585918426514, "rewards_train/2-w": -1.4013015031814575, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.1055651903152466, "rewards_train/margins_1": 3.368916630744934, "rewards_train/margins_2": 3.500860095024109, "step": 91 }, { "epoch": 0.27, "logps_train/policy_1_2": -166.15863037109375, "logps_train/policy_1_l": -163.754150390625, "logps_train/policy_1_w": -125.65322875976562, "logps_train/policy_2_2": -126.0120849609375, "logps_train/policy_2_w": -168.22909545898438, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.0924251079559326, "rewards_train/1-l": -1.6943237781524658, "rewards_train/1-w": 1.388192892074585, "rewards_train/2-2": 1.3675419092178345, "rewards_train/2-w": -1.1336511373519897, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.082516670227051, "rewards_train/margins_1": 2.4806180000305176, "rewards_train/margins_2": 2.501193046569824, "step": 91 }, { "epoch": 0.28, "learning_rate": 4.898098898861766e-06, "loss": 1.1925, "step": 92 }, { "epoch": 0.28, "logps_train/policy_1_2": -186.8197021484375, "logps_train/policy_1_l": -191.7509765625, "logps_train/policy_1_w": -152.28811645507812, "logps_train/policy_2_2": -136.56417846679688, "logps_train/policy_2_w": -205.68287658691406, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -0.938805878162384, "rewards_train/1-l": -1.8260741233825684, "rewards_train/1-w": 2.1586880683898926, "rewards_train/2-2": 1.9193644523620605, "rewards_train/2-w": -0.4018816351890564, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.984762191772461, "rewards_train/margins_1": 3.0974939465522766, "rewards_train/margins_2": 2.321246087551117, "step": 92 }, { "epoch": 0.28, "logps_train/policy_1_2": -160.76416015625, "logps_train/policy_1_l": -158.18812561035156, "logps_train/policy_1_w": -113.53631591796875, "logps_train/policy_2_2": -120.06267547607422, "logps_train/policy_2_w": -142.8209228515625, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -0.668993353843689, "rewards_train/1-l": -2.1413469314575195, "rewards_train/1-w": 1.0432441234588623, "rewards_train/2-2": 1.5804508924484253, "rewards_train/2-w": -0.3133425712585449, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.184591054916382, "rewards_train/margins_1": 1.7122374773025513, "rewards_train/margins_2": 1.8937934637069702, "step": 92 }, { "epoch": 0.28, "logps_train/policy_1_2": -196.17425537109375, "logps_train/policy_1_l": -179.27435302734375, "logps_train/policy_1_w": -131.66720581054688, "logps_train/policy_2_2": -137.10284423828125, "logps_train/policy_2_w": -194.6790008544922, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.343206763267517, "rewards_train/1-l": -2.0486507415771484, "rewards_train/1-w": 2.1037864685058594, "rewards_train/2-2": 2.171355962753296, "rewards_train/2-w": -1.2620402574539185, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.152437210083008, "rewards_train/margins_1": 3.4469932317733765, "rewards_train/margins_2": 3.4333962202072144, "step": 92 }, { "epoch": 0.28, "logps_train/policy_1_2": -175.1388397216797, "logps_train/policy_1_l": -201.4711151123047, "logps_train/policy_1_w": -160.64093017578125, "logps_train/policy_2_2": -127.74984741210938, "logps_train/policy_2_w": -211.13917541503906, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.218571662902832, "rewards_train/1-l": -2.0173254013061523, "rewards_train/1-w": 1.2210644483566284, "rewards_train/2-2": 1.3648592233657837, "rewards_train/2-w": -1.3170428276062012, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.2383898496627808, "rewards_train/margins_1": 2.4396361112594604, "rewards_train/margins_2": 2.681902050971985, "step": 92 }, { "epoch": 0.28, "logps_train/policy_1_2": -203.69485473632812, "logps_train/policy_1_l": -188.39842224121094, "logps_train/policy_1_w": -144.6802978515625, "logps_train/policy_2_2": -136.00344848632812, "logps_train/policy_2_w": -233.19424438476562, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -2.034132957458496, "rewards_train/1-l": -2.6319570541381836, "rewards_train/1-w": 2.249424457550049, "rewards_train/2-2": 2.0561013221740723, "rewards_train/2-w": -2.9081945419311523, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.881381511688232, "rewards_train/margins_1": 4.283557415008545, "rewards_train/margins_2": 4.964295864105225, "step": 93 }, { "epoch": 0.28, "logps_train/policy_1_2": -246.3278350830078, "logps_train/policy_1_l": -218.133056640625, "logps_train/policy_1_w": -157.83035278320312, "logps_train/policy_2_2": -162.4868927001953, "logps_train/policy_2_w": -255.73391723632812, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": -2.044306755065918, "rewards_train/1-l": -1.795875072479248, "rewards_train/1-w": 1.9939182996749878, "rewards_train/2-2": 2.6374430656433105, "rewards_train/2-w": -2.821049451828003, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.789793372154236, "rewards_train/margins_1": 4.038225054740906, "rewards_train/margins_2": 5.4584925174713135, "step": 93 }, { "epoch": 0.28, "logps_train/policy_1_2": -195.42578125, "logps_train/policy_1_l": -178.30715942382812, "logps_train/policy_1_w": -147.698974609375, "logps_train/policy_2_2": -149.25213623046875, "logps_train/policy_2_w": -204.27423095703125, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.6906245946884155, "rewards_train/1-l": -2.0248804092407227, "rewards_train/1-w": 1.899437665939331, "rewards_train/2-2": 1.89744234085083, "rewards_train/2-w": -1.1961721181869507, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9243180751800537, "rewards_train/margins_1": 2.5900622606277466, "rewards_train/margins_2": 3.0936144590377808, "step": 93 }, { "epoch": 0.28, "logps_train/policy_1_2": -209.0667724609375, "logps_train/policy_1_l": -169.2237548828125, "logps_train/policy_1_w": -119.57135009765625, "logps_train/policy_2_2": -158.1005859375, "logps_train/policy_2_w": -164.261474609375, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.0555057525634766, "rewards_train/1-l": -1.8049914836883545, "rewards_train/1-w": 1.862884283065796, "rewards_train/2-2": 2.0276358127593994, "rewards_train/2-w": -0.5029059648513794, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6678757667541504, "rewards_train/margins_1": 2.9183900356292725, "rewards_train/margins_2": 2.530541777610779, "step": 93 }, { "epoch": 0.28, "learning_rate": 4.891002460691306e-06, "loss": 1.1309, "step": 94 }, { "epoch": 0.28, "logps_train/policy_1_2": -145.28335571289062, "logps_train/policy_1_l": -121.18026733398438, "logps_train/policy_1_w": -98.10645294189453, "logps_train/policy_2_2": -103.57215881347656, "logps_train/policy_2_w": -135.79945373535156, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": -0.901383638381958, "rewards_train/1-l": -1.0295498371124268, "rewards_train/1-w": 1.3184564113616943, "rewards_train/2-2": 1.216612458229065, "rewards_train/2-w": -0.47994548082351685, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.348006248474121, "rewards_train/margins_1": 2.2198400497436523, "rewards_train/margins_2": 1.6965579390525818, "step": 94 }, { "epoch": 0.28, "logps_train/policy_1_2": -180.79205322265625, "logps_train/policy_1_l": -205.8433837890625, "logps_train/policy_1_w": -94.13912963867188, "logps_train/policy_2_2": -127.5595474243164, "logps_train/policy_2_w": -125.58236694335938, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": -1.0311579704284668, "rewards_train/1-l": -2.003087043762207, "rewards_train/1-w": 1.7751494646072388, "rewards_train/2-2": 1.971682071685791, "rewards_train/2-w": -0.0465179905295372, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.778236508369446, "rewards_train/margins_1": 2.8063074350357056, "rewards_train/margins_2": 2.018200062215328, "step": 94 }, { "epoch": 0.28, "logps_train/policy_1_2": -107.85757446289062, "logps_train/policy_1_l": -122.9418716430664, "logps_train/policy_1_w": -89.65361022949219, "logps_train/policy_2_2": -85.13495635986328, "logps_train/policy_2_w": -123.44955444335938, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -114.5, "rewards_train/1-2": -0.40841367840766907, "rewards_train/1-l": -0.8965060114860535, "rewards_train/1-w": 0.7475290298461914, "rewards_train/2-2": 0.7056451439857483, "rewards_train/2-w": -0.8963226079940796, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.6440350413322449, "rewards_train/margins_1": 1.1559427082538605, "rewards_train/margins_2": 1.6019677519798279, "step": 94 }, { "epoch": 0.28, "logps_train/policy_1_2": -122.86747741699219, "logps_train/policy_1_l": -125.99980926513672, "logps_train/policy_1_w": -109.34962463378906, "logps_train/policy_2_2": -81.46261596679688, "logps_train/policy_2_w": -159.27984619140625, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.9812792539596558, "rewards_train/1-l": -1.298271656036377, "rewards_train/1-w": 1.3488261699676514, "rewards_train/2-2": 1.0582305192947388, "rewards_train/2-w": -1.1791558265686035, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.6470978260040283, "rewards_train/margins_1": 2.330105423927307, "rewards_train/margins_2": 2.2373863458633423, "step": 94 }, { "epoch": 0.28, "logps_train/policy_1_2": -190.7745361328125, "logps_train/policy_1_l": -214.63612365722656, "logps_train/policy_1_w": -184.93312072753906, "logps_train/policy_2_2": -143.03573608398438, "logps_train/policy_2_w": -250.19088745117188, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": -0.5016722679138184, "rewards_train/1-l": -2.2778706550598145, "rewards_train/1-w": 2.3277812004089355, "rewards_train/2-2": 2.109707832336426, "rewards_train/2-w": -1.1847132444381714, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.60565185546875, "rewards_train/margins_1": 2.829453468322754, "rewards_train/margins_2": 3.294421076774597, "step": 95 }, { "epoch": 0.28, "logps_train/policy_1_2": -134.73611450195312, "logps_train/policy_1_l": -91.71614074707031, "logps_train/policy_1_w": -92.39224243164062, "logps_train/policy_2_2": -92.31326293945312, "logps_train/policy_2_w": -120.72483825683594, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -81.5, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": -0.8501242995262146, "rewards_train/1-l": -1.0071851015090942, "rewards_train/1-w": 1.1884857416152954, "rewards_train/2-2": 1.7132539749145508, "rewards_train/2-w": -0.26747894287109375, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 2.1956708431243896, "rewards_train/margins_1": 2.03861004114151, "rewards_train/margins_2": 1.9807329177856445, "step": 95 }, { "epoch": 0.28, "logps_train/policy_1_2": -218.6458740234375, "logps_train/policy_1_l": -158.4573974609375, "logps_train/policy_1_w": -138.14157104492188, "logps_train/policy_2_2": -163.46499633789062, "logps_train/policy_2_w": -203.00250244140625, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.3825554847717285, "rewards_train/1-l": -1.474012851715088, "rewards_train/1-w": 1.7472681999206543, "rewards_train/2-2": 1.694515585899353, "rewards_train/2-w": -1.4979068040847778, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.221281051635742, "rewards_train/margins_1": 3.129823684692383, "rewards_train/margins_2": 3.192422389984131, "step": 95 }, { "epoch": 0.28, "logps_train/policy_1_2": -123.66775512695312, "logps_train/policy_1_l": -142.3338623046875, "logps_train/policy_1_w": -88.6181640625, "logps_train/policy_2_2": -91.13887023925781, "logps_train/policy_2_w": -120.99710845947266, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": -0.8545687198638916, "rewards_train/1-l": -1.4047727584838867, "rewards_train/1-w": 0.9282236099243164, "rewards_train/2-2": 0.8070113658905029, "rewards_train/2-w": -0.6077190637588501, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 2.332996368408203, "rewards_train/margins_1": 1.782792329788208, "rewards_train/margins_2": 1.414730429649353, "step": 95 }, { "epoch": 0.29, "learning_rate": 4.883672664895761e-06, "loss": 1.3353, "step": 96 }, { "epoch": 0.29, "logps_train/policy_1_2": -144.28631591796875, "logps_train/policy_1_l": -128.56838989257812, "logps_train/policy_1_w": -136.32943725585938, "logps_train/policy_2_2": -105.26252746582031, "logps_train/policy_2_w": -191.2932586669922, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -0.22394435107707977, "rewards_train/1-l": -1.428615927696228, "rewards_train/1-w": 2.4904942512512207, "rewards_train/2-2": 1.8034348487854004, "rewards_train/2-w": -0.7752243280410767, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9191101789474487, "rewards_train/margins_1": 2.7144386023283005, "rewards_train/margins_2": 2.578659176826477, "step": 96 }, { "epoch": 0.29, "logps_train/policy_1_2": -170.72178649902344, "logps_train/policy_1_l": -175.536865234375, "logps_train/policy_1_w": -121.09031677246094, "logps_train/policy_2_2": -125.23448181152344, "logps_train/policy_2_w": -182.19691467285156, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -0.9463979005813599, "rewards_train/1-l": -1.3239021301269531, "rewards_train/1-w": 2.073878288269043, "rewards_train/2-2": 1.240027666091919, "rewards_train/2-w": -0.8706685900688171, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.397780418395996, "rewards_train/margins_1": 3.020276188850403, "rewards_train/margins_2": 2.110696256160736, "step": 96 }, { "epoch": 0.29, "logps_train/policy_1_2": -154.9053192138672, "logps_train/policy_1_l": -148.25155639648438, "logps_train/policy_1_w": -133.09698486328125, "logps_train/policy_2_2": -115.24188232421875, "logps_train/policy_2_w": -190.22842407226562, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -0.2952193021774292, "rewards_train/1-l": -1.6324803829193115, "rewards_train/1-w": 2.062567710876465, "rewards_train/2-2": 1.906769037246704, "rewards_train/2-w": -1.3892478942871094, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6950480937957764, "rewards_train/margins_1": 2.357787013053894, "rewards_train/margins_2": 3.2960169315338135, "step": 96 }, { "epoch": 0.29, "logps_train/policy_1_2": -127.06585693359375, "logps_train/policy_1_l": -146.40072631835938, "logps_train/policy_1_w": -149.85186767578125, "logps_train/policy_2_2": -89.57430267333984, "logps_train/policy_2_w": -206.396484375, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -0.46791332960128784, "rewards_train/1-l": -1.392026662826538, "rewards_train/1-w": 2.1874685287475586, "rewards_train/2-2": 1.424698829650879, "rewards_train/2-w": -1.1076189279556274, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.5794951915740967, "rewards_train/margins_1": 2.6553818583488464, "rewards_train/margins_2": 2.5323177576065063, "step": 96 }, { "epoch": 0.29, "logps_train/policy_1_2": -205.9743194580078, "logps_train/policy_1_l": -177.67349243164062, "logps_train/policy_1_w": -119.45356750488281, "logps_train/policy_2_2": -141.78318786621094, "logps_train/policy_2_w": -178.77700805664062, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.3302446603775024, "rewards_train/1-l": -1.4327781200408936, "rewards_train/1-w": 1.7253470420837402, "rewards_train/2-2": 2.3310556411743164, "rewards_train/2-w": -1.7261390686035156, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.158125162124634, "rewards_train/margins_1": 3.0555917024612427, "rewards_train/margins_2": 4.057194709777832, "step": 97 }, { "epoch": 0.29, "logps_train/policy_1_2": -172.83233642578125, "logps_train/policy_1_l": -170.4469451904297, "logps_train/policy_1_w": -117.83380889892578, "logps_train/policy_2_2": -125.4353256225586, "logps_train/policy_2_w": -161.60150146484375, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -0.6277650594711304, "rewards_train/1-l": -1.5909833908081055, "rewards_train/1-w": 1.4071954488754272, "rewards_train/2-2": 1.716624140739441, "rewards_train/2-w": -0.8586622476577759, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9981788396835327, "rewards_train/margins_1": 2.0349605083465576, "rewards_train/margins_2": 2.575286388397217, "step": 97 }, { "epoch": 0.29, "logps_train/policy_1_2": -137.64373779296875, "logps_train/policy_1_l": -146.54293823242188, "logps_train/policy_1_w": -131.6168212890625, "logps_train/policy_2_2": -105.10249328613281, "logps_train/policy_2_w": -179.00917053222656, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -0.6708188652992249, "rewards_train/1-l": -1.2103493213653564, "rewards_train/1-w": 1.4695677757263184, "rewards_train/2-2": 1.065335988998413, "rewards_train/2-w": -1.3858778476715088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.679917097091675, "rewards_train/margins_1": 2.140386641025543, "rewards_train/margins_2": 2.451213836669922, "step": 97 }, { "epoch": 0.29, "logps_train/policy_1_2": -195.69825744628906, "logps_train/policy_1_l": -194.12371826171875, "logps_train/policy_1_w": -130.40371704101562, "logps_train/policy_2_2": -145.093994140625, "logps_train/policy_2_w": -174.1249237060547, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.9436536431312561, "rewards_train/1-l": -2.264813184738159, "rewards_train/1-w": 1.6421968936920166, "rewards_train/2-2": 1.7999763488769531, "rewards_train/2-w": -0.39921140670776367, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.907010078430176, "rewards_train/margins_1": 2.5858505368232727, "rewards_train/margins_2": 2.199187755584717, "step": 97 }, { "epoch": 0.29, "learning_rate": 4.876110226850278e-06, "loss": 1.1674, "step": 98 }, { "epoch": 0.29, "logps_train/policy_1_2": -178.05264282226562, "logps_train/policy_1_l": -154.59298706054688, "logps_train/policy_1_w": -159.22787475585938, "logps_train/policy_2_2": -135.40658569335938, "logps_train/policy_2_w": -201.44015502929688, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": 0.022079288959503174, "rewards_train/1-l": -1.0757540464401245, "rewards_train/1-w": 1.7974271774291992, "rewards_train/2-2": 2.143324613571167, "rewards_train/2-w": -0.288937509059906, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.8731812238693237, "rewards_train/margins_1": 1.775347888469696, "rewards_train/margins_2": 2.432262122631073, "step": 98 }, { "epoch": 0.29, "logps_train/policy_1_2": -144.422607421875, "logps_train/policy_1_l": -173.5851287841797, "logps_train/policy_1_w": -144.4934539794922, "logps_train/policy_2_2": -110.34680938720703, "logps_train/policy_2_w": -189.79302978515625, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -0.06726023554801941, "rewards_train/1-l": -1.7437667846679688, "rewards_train/1-w": 1.7185745239257812, "rewards_train/2-2": 1.6972527503967285, "rewards_train/2-w": -0.9730538725852966, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.46234130859375, "rewards_train/margins_1": 1.7858347594738007, "rewards_train/margins_2": 2.670306622982025, "step": 98 }, { "epoch": 0.29, "logps_train/policy_1_2": -125.68073272705078, "logps_train/policy_1_l": -104.084716796875, "logps_train/policy_1_w": -78.30952453613281, "logps_train/policy_2_2": -92.1230239868164, "logps_train/policy_2_w": -114.32252502441406, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -102.5, "rewards_train/1-2": -0.659870445728302, "rewards_train/1-l": -1.0385500192642212, "rewards_train/1-w": 0.6616747379302979, "rewards_train/2-2": 1.0669941902160645, "rewards_train/2-w": -1.2051043510437012, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 1.700224757194519, "rewards_train/margins_1": 1.3215451836585999, "rewards_train/margins_2": 2.2720985412597656, "step": 98 }, { "epoch": 0.29, "logps_train/policy_1_2": -109.70032501220703, "logps_train/policy_1_l": -95.05182647705078, "logps_train/policy_1_w": -63.08635330200195, "logps_train/policy_2_2": -79.32608795166016, "logps_train/policy_2_w": -93.57667541503906, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -87.5, "logps_train/ref_1_w": -73.5, "logps_train/ref_2_2": -90.5, "logps_train/ref_2_w": -89.5, "rewards_train/1-2": -0.284875750541687, "rewards_train/1-l": -0.7471263408660889, "rewards_train/1-w": 1.0290601253509521, "rewards_train/2-2": 1.1380937099456787, "rewards_train/2-w": -0.4121595323085785, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 1.776186466217041, "rewards_train/margins_1": 1.3139358758926392, "rewards_train/margins_2": 1.5502532422542572, "step": 98 }, { "epoch": 0.3, "logps_train/policy_1_2": -185.66156005859375, "logps_train/policy_1_l": -144.3651885986328, "logps_train/policy_1_w": -120.89364624023438, "logps_train/policy_2_2": -136.73321533203125, "logps_train/policy_2_w": -167.79486083984375, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -0.4325622618198395, "rewards_train/1-l": -1.0871047973632812, "rewards_train/1-w": 1.9113187789916992, "rewards_train/2-2": 2.1118335723876953, "rewards_train/2-w": -0.4869072139263153, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.9984235763549805, "rewards_train/margins_1": 2.3438810408115387, "rewards_train/margins_2": 2.5987407863140106, "step": 99 }, { "epoch": 0.3, "logps_train/policy_1_2": -179.9886474609375, "logps_train/policy_1_l": -156.12278747558594, "logps_train/policy_1_w": -118.29910278320312, "logps_train/policy_2_2": -139.6253662109375, "logps_train/policy_2_w": -167.1582794189453, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.3230843245983124, "rewards_train/1-l": -1.7386460304260254, "rewards_train/1-w": 2.133761405944824, "rewards_train/2-2": 1.6718382835388184, "rewards_train/2-w": -0.6556717753410339, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.8724074363708496, "rewards_train/margins_1": 2.4568457305431366, "rewards_train/margins_2": 2.3275100588798523, "step": 99 }, { "epoch": 0.3, "logps_train/policy_1_2": -184.8125, "logps_train/policy_1_l": -153.14175415039062, "logps_train/policy_1_w": -146.80877685546875, "logps_train/policy_2_2": -141.31314086914062, "logps_train/policy_2_w": -201.05709838867188, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -0.6735600233078003, "rewards_train/1-l": -0.8416180610656738, "rewards_train/1-w": 1.8808417320251465, "rewards_train/2-2": 1.7659516334533691, "rewards_train/2-w": -0.8197734951972961, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.7224597930908203, "rewards_train/margins_1": 2.5544017553329468, "rewards_train/margins_2": 2.5857251286506653, "step": 99 }, { "epoch": 0.3, "logps_train/policy_1_2": -145.62826538085938, "logps_train/policy_1_l": -108.19497680664062, "logps_train/policy_1_w": -111.75541687011719, "logps_train/policy_2_2": -108.13850402832031, "logps_train/policy_2_w": -154.08447265625, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -98.5, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -0.2909510135650635, "rewards_train/1-l": -0.9563132524490356, "rewards_train/1-w": 1.313520908355713, "rewards_train/2-2": 1.5783369541168213, "rewards_train/2-w": -0.9986819624900818, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.2698341608047485, "rewards_train/margins_1": 1.6044719219207764, "rewards_train/margins_2": 2.577018916606903, "step": 99 }, { "epoch": 0.3, "learning_rate": 4.868315884635479e-06, "loss": 1.2427, "step": 100 }, { "epoch": 0.3, "logps_train/policy_1_2": -139.0593719482422, "logps_train/policy_1_l": -148.47137451171875, "logps_train/policy_1_w": -93.97714233398438, "logps_train/policy_2_2": -99.25689697265625, "logps_train/policy_2_w": -131.73556518554688, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -106.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": -0.10066337138414383, "rewards_train/1-l": -0.7722357511520386, "rewards_train/1-w": 1.2593170404434204, "rewards_train/2-2": 1.6885685920715332, "rewards_train/2-w": -0.232932448387146, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.031552791595459, "rewards_train/margins_1": 1.3599804118275642, "rewards_train/margins_2": 1.9215010404586792, "step": 100 }, { "epoch": 0.3, "logps_train/policy_1_2": -148.21478271484375, "logps_train/policy_1_l": -151.64492797851562, "logps_train/policy_1_w": -103.59147644042969, "logps_train/policy_2_2": -107.47904205322266, "logps_train/policy_2_w": -144.52041625976562, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -0.2150338590145111, "rewards_train/1-l": -1.5215731859207153, "rewards_train/1-w": 1.5107746124267578, "rewards_train/2-2": 2.104243755340576, "rewards_train/2-w": -0.7827052474021912, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.032347798347473, "rewards_train/margins_1": 1.725808471441269, "rewards_train/margins_2": 2.8869490027427673, "step": 100 }, { "epoch": 0.3, "logps_train/policy_1_2": -205.70648193359375, "logps_train/policy_1_l": -204.004638671875, "logps_train/policy_1_w": -166.07452392578125, "logps_train/policy_2_2": -157.2894287109375, "logps_train/policy_2_w": -219.09364318847656, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -0.7925233244895935, "rewards_train/1-l": -1.7878170013427734, "rewards_train/1-w": 2.540985584259033, "rewards_train/2-2": 1.7921503782272339, "rewards_train/2-w": 0.08985551446676254, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.328802585601807, "rewards_train/margins_1": 3.3335089087486267, "rewards_train/margins_2": 1.7022948637604713, "step": 100 }, { "epoch": 0.3, "logps_train/policy_1_2": -176.86849975585938, "logps_train/policy_1_l": -174.5355682373047, "logps_train/policy_1_w": -105.25559997558594, "logps_train/policy_2_2": -140.41783142089844, "logps_train/policy_2_w": -142.465576171875, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": -0.4516935348510742, "rewards_train/1-l": -2.026115894317627, "rewards_train/1-w": 1.0770277976989746, "rewards_train/2-2": 1.094154715538025, "rewards_train/2-w": -0.939136803150177, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1031436920166016, "rewards_train/margins_1": 1.5287213325500488, "rewards_train/margins_2": 2.033291518688202, "step": 100 }, { "epoch": 0.3, "logps_train/policy_1_2": -155.85940551757812, "logps_train/policy_1_l": -182.94398498535156, "logps_train/policy_1_w": -153.67698669433594, "logps_train/policy_2_2": -107.67343139648438, "logps_train/policy_2_w": -217.1346435546875, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -1.1023472547531128, "rewards_train/1-l": -2.260805606842041, "rewards_train/1-w": 2.520387649536133, "rewards_train/2-2": 1.2600007057189941, "rewards_train/2-w": -1.394714593887329, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.781193256378174, "rewards_train/margins_1": 3.6227349042892456, "rewards_train/margins_2": 2.6547152996063232, "step": 101 }, { "epoch": 0.3, "logps_train/policy_1_2": -154.33154296875, "logps_train/policy_1_l": -115.72109985351562, "logps_train/policy_1_w": -116.40670013427734, "logps_train/policy_2_2": -112.34058380126953, "logps_train/policy_2_w": -143.7438507080078, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -0.31870096921920776, "rewards_train/1-l": -1.1704012155532837, "rewards_train/1-w": 1.0095252990722656, "rewards_train/2-2": 2.00148868560791, "rewards_train/2-w": -0.22985334694385529, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.1799265146255493, "rewards_train/margins_1": 1.3282262682914734, "rewards_train/margins_2": 2.2313420325517654, "step": 101 }, { "epoch": 0.3, "logps_train/policy_1_2": -246.6759033203125, "logps_train/policy_1_l": -132.24533081054688, "logps_train/policy_1_w": -128.9722137451172, "logps_train/policy_2_2": -159.489990234375, "logps_train/policy_2_w": -192.12081909179688, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -2.291027069091797, "rewards_train/1-l": -1.388901710510254, "rewards_train/1-w": 1.9387154579162598, "rewards_train/2-2": 2.4691641330718994, "rewards_train/2-w": -1.6995826959609985, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3276171684265137, "rewards_train/margins_1": 4.229742527008057, "rewards_train/margins_2": 4.168746829032898, "step": 101 }, { "epoch": 0.3, "logps_train/policy_1_2": -205.3283233642578, "logps_train/policy_1_l": -222.4574737548828, "logps_train/policy_1_w": -144.46844482421875, "logps_train/policy_2_2": -159.9713134765625, "logps_train/policy_2_w": -182.40097045898438, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -0.3332222104072571, "rewards_train/1-l": -2.1635212898254395, "rewards_train/1-w": 2.0000314712524414, "rewards_train/2-2": 2.3145875930786133, "rewards_train/2-w": -0.13658106327056885, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.163552761077881, "rewards_train/margins_1": 2.3332536816596985, "rewards_train/margins_2": 2.451168656349182, "step": 101 }, { "epoch": 0.31, "learning_rate": 4.860290398965423e-06, "loss": 1.2299, "step": 102 }, { "epoch": 0.31, "logps_train/policy_1_2": -186.396240234375, "logps_train/policy_1_l": -164.26438903808594, "logps_train/policy_1_w": -131.93258666992188, "logps_train/policy_2_2": -136.2225341796875, "logps_train/policy_2_w": -172.3781280517578, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -0.5478264093399048, "rewards_train/1-l": -1.6283929347991943, "rewards_train/1-w": 2.4216837882995605, "rewards_train/2-2": 2.089465856552124, "rewards_train/2-w": 0.27947261929512024, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.050076723098755, "rewards_train/margins_1": 2.9695101976394653, "rewards_train/margins_2": 1.8099932372570038, "step": 102 }, { "epoch": 0.31, "logps_train/policy_1_2": -172.6748809814453, "logps_train/policy_1_l": -208.1060333251953, "logps_train/policy_1_w": -159.3260498046875, "logps_train/policy_2_2": -117.98910522460938, "logps_train/policy_2_w": -243.8974609375, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": -1.2065510749816895, "rewards_train/1-l": -1.9596271514892578, "rewards_train/1-w": 2.519153594970703, "rewards_train/2-2": 1.7264800071716309, "rewards_train/2-w": -2.047168493270874, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.478780746459961, "rewards_train/margins_1": 3.7257046699523926, "rewards_train/margins_2": 3.773648500442505, "step": 102 }, { "epoch": 0.31, "logps_train/policy_1_2": -206.95033264160156, "logps_train/policy_1_l": -216.65676879882812, "logps_train/policy_1_w": -142.9158172607422, "logps_train/policy_2_2": -157.75021362304688, "logps_train/policy_2_w": -207.3177947998047, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -0.7656387090682983, "rewards_train/1-l": -1.6010280847549438, "rewards_train/1-w": 2.6792197227478027, "rewards_train/2-2": 1.8790297508239746, "rewards_train/2-w": -0.9263100624084473, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.280247807502747, "rewards_train/margins_1": 3.444858431816101, "rewards_train/margins_2": 2.805339813232422, "step": 102 }, { "epoch": 0.31, "logps_train/policy_1_2": -146.69105529785156, "logps_train/policy_1_l": -170.95294189453125, "logps_train/policy_1_w": -119.7136459350586, "logps_train/policy_2_2": -106.59478759765625, "logps_train/policy_2_w": -164.9933319091797, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -0.9064110517501831, "rewards_train/1-l": -1.4146301746368408, "rewards_train/1-w": 2.1188697814941406, "rewards_train/2-2": 1.2030220031738281, "rewards_train/2-w": -0.3989419937133789, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.5334999561309814, "rewards_train/margins_1": 3.0252808332443237, "rewards_train/margins_2": 1.601963996887207, "step": 102 }, { "epoch": 0.31, "logps_train/policy_1_2": -103.3647689819336, "logps_train/policy_1_l": -65.9932861328125, "logps_train/policy_1_w": -74.01461791992188, "logps_train/policy_2_2": -76.23828125, "logps_train/policy_2_w": -94.84294128417969, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -60.25, "logps_train/ref_1_w": -85.5, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -95.5, "rewards_train/1-2": -0.509914219379425, "rewards_train/1-l": -0.5831172466278076, "rewards_train/1-w": 1.1336944103240967, "rewards_train/2-2": 0.8675782084465027, "rewards_train/2-w": 0.06746385246515274, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 1.7168116569519043, "rewards_train/margins_1": 1.6436086297035217, "rewards_train/margins_2": 0.80011435598135, "step": 103 }, { "epoch": 0.31, "logps_train/policy_1_2": -137.10455322265625, "logps_train/policy_1_l": -133.6339569091797, "logps_train/policy_1_w": -95.39994812011719, "logps_train/policy_2_2": -92.68923950195312, "logps_train/policy_2_w": -133.44097900390625, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -1.4633839130401611, "rewards_train/1-l": -1.3905441761016846, "rewards_train/1-w": 1.650826334953308, "rewards_train/2-2": 1.4009981155395508, "rewards_train/2-w": -0.33394166827201843, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.0413705110549927, "rewards_train/margins_1": 3.1142102479934692, "rewards_train/margins_2": 1.7349397838115692, "step": 103 }, { "epoch": 0.31, "logps_train/policy_1_2": -182.05938720703125, "logps_train/policy_1_l": -191.6944122314453, "logps_train/policy_1_w": -111.2629623413086, "logps_train/policy_2_2": -143.07101440429688, "logps_train/policy_2_w": -146.24810791015625, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -0.14578379690647125, "rewards_train/1-l": -2.131159782409668, "rewards_train/1-w": 1.9322974681854248, "rewards_train/2-2": 2.1335232257843018, "rewards_train/2-w": 0.05253351479768753, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.063457250595093, "rewards_train/margins_1": 2.078081265091896, "rewards_train/margins_2": 2.0809897109866142, "step": 103 }, { "epoch": 0.31, "logps_train/policy_1_2": -128.3194580078125, "logps_train/policy_1_l": -69.87220001220703, "logps_train/policy_1_w": -56.74565505981445, "logps_train/policy_2_2": -90.9125747680664, "logps_train/policy_2_w": -83.46744537353516, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -62.25, "logps_train/ref_1_w": -66.0, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -79.0, "rewards_train/1-2": -0.3710554242134094, "rewards_train/1-l": -0.7588506937026978, "rewards_train/1-w": 0.9457472562789917, "rewards_train/2-2": 1.6322286128997803, "rewards_train/2-w": -0.4674471914768219, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 1.7045979499816895, "rewards_train/margins_1": 1.3168026804924011, "rewards_train/margins_2": 2.099675804376602, "step": 103 }, { "epoch": 0.31, "learning_rate": 4.852034553113364e-06, "loss": 1.2283, "step": 104 }, { "epoch": 0.31, "logps_train/policy_1_2": -147.01846313476562, "logps_train/policy_1_l": -159.4899139404297, "logps_train/policy_1_w": -134.21575927734375, "logps_train/policy_2_2": -119.14216613769531, "logps_train/policy_2_w": -171.3128662109375, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.14579267799854279, "rewards_train/1-l": -1.1856135129928589, "rewards_train/1-w": 1.969050407409668, "rewards_train/2-2": 1.3347084522247314, "rewards_train/2-w": -0.11839443445205688, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.154663920402527, "rewards_train/margins_1": 2.1148430854082108, "rewards_train/margins_2": 1.4531028866767883, "step": 104 }, { "epoch": 0.31, "logps_train/policy_1_2": -246.34405517578125, "logps_train/policy_1_l": -186.85618591308594, "logps_train/policy_1_w": -134.3907928466797, "logps_train/policy_2_2": -170.56573486328125, "logps_train/policy_2_w": -196.16278076171875, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -1.5328437089920044, "rewards_train/1-l": -2.024486541748047, "rewards_train/1-w": 1.586115837097168, "rewards_train/2-2": 2.8504579067230225, "rewards_train/2-w": -1.8162775039672852, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.610602378845215, "rewards_train/margins_1": 3.1189595460891724, "rewards_train/margins_2": 4.666735410690308, "step": 104 }, { "epoch": 0.31, "logps_train/policy_1_2": -212.3402862548828, "logps_train/policy_1_l": -193.5930633544922, "logps_train/policy_1_w": -141.36849975585938, "logps_train/policy_2_2": -159.69009399414062, "logps_train/policy_2_w": -195.18179321289062, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -0.6355916261672974, "rewards_train/1-l": -1.6603813171386719, "rewards_train/1-w": 2.5658836364746094, "rewards_train/2-2": 2.4532558917999268, "rewards_train/2-w": -0.4146634340286255, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.226264953613281, "rewards_train/margins_1": 3.2014752626419067, "rewards_train/margins_2": 2.8679193258285522, "step": 104 }, { "epoch": 0.31, "logps_train/policy_1_2": -160.62921142578125, "logps_train/policy_1_l": -146.13897705078125, "logps_train/policy_1_w": -118.91549682617188, "logps_train/policy_2_2": -123.96651458740234, "logps_train/policy_2_w": -156.61251831054688, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": -0.20120176672935486, "rewards_train/1-l": -1.1068668365478516, "rewards_train/1-w": 1.744387149810791, "rewards_train/2-2": 1.6779577732086182, "rewards_train/2-w": -0.2684788405895233, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.8512539863586426, "rewards_train/margins_1": 1.9455889165401459, "rewards_train/margins_2": 1.9464366137981415, "step": 104 }, { "epoch": 0.31, "logps_train/policy_1_2": -224.07748413085938, "logps_train/policy_1_l": -204.32064819335938, "logps_train/policy_1_w": -134.81423950195312, "logps_train/policy_2_2": -155.97796630859375, "logps_train/policy_2_w": -211.40548706054688, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.9850916862487793, "rewards_train/1-l": -1.7244470119476318, "rewards_train/1-w": 1.6400604248046875, "rewards_train/2-2": 1.8291575908660889, "rewards_train/2-w": -2.3174052238464355, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.3645074367523193, "rewards_train/margins_1": 3.625152111053467, "rewards_train/margins_2": 4.146562814712524, "step": 105 }, { "epoch": 0.31, "logps_train/policy_1_2": -193.83084106445312, "logps_train/policy_1_l": -184.4801025390625, "logps_train/policy_1_w": -131.72933959960938, "logps_train/policy_2_2": -141.16305541992188, "logps_train/policy_2_w": -177.05029296875, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -1.090114712715149, "rewards_train/1-l": -1.484631061553955, "rewards_train/1-w": 1.6903469562530518, "rewards_train/2-2": 1.8722693920135498, "rewards_train/2-w": -0.7997549772262573, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.174978017807007, "rewards_train/margins_1": 2.7804616689682007, "rewards_train/margins_2": 2.672024369239807, "step": 105 }, { "epoch": 0.31, "logps_train/policy_1_2": -147.56825256347656, "logps_train/policy_1_l": -117.57746887207031, "logps_train/policy_1_w": -122.46564483642578, "logps_train/policy_2_2": -96.89787292480469, "logps_train/policy_2_w": -185.83563232421875, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -1.6560442447662354, "rewards_train/1-l": -1.2652664184570312, "rewards_train/1-w": 2.0083186626434326, "rewards_train/2-2": 1.1987872123718262, "rewards_train/2-w": -1.7329771518707275, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.273585081100464, "rewards_train/margins_1": 3.664362907409668, "rewards_train/margins_2": 2.9317643642425537, "step": 105 }, { "epoch": 0.31, "logps_train/policy_1_2": -159.75765991210938, "logps_train/policy_1_l": -139.09494018554688, "logps_train/policy_1_w": -141.50033569335938, "logps_train/policy_2_2": -115.7231216430664, "logps_train/policy_2_w": -205.59861755371094, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -0.828599750995636, "rewards_train/1-l": -1.2450909614562988, "rewards_train/1-w": 1.9775052070617676, "rewards_train/2-2": 1.3781757354736328, "rewards_train/2-w": -1.468260645866394, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.2225961685180664, "rewards_train/margins_1": 2.8061049580574036, "rewards_train/margins_2": 2.846436381340027, "step": 105 }, { "epoch": 0.32, "learning_rate": 4.843549152835303e-06, "loss": 1.1797, "step": 106 }, { "epoch": 0.32, "logps_train/policy_1_2": -190.81410217285156, "logps_train/policy_1_l": -189.00399780273438, "logps_train/policy_1_w": -141.9587860107422, "logps_train/policy_2_2": -133.55447387695312, "logps_train/policy_2_w": -201.05984497070312, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -0.936098575592041, "rewards_train/1-l": -0.9412798285484314, "rewards_train/1-w": 2.100214958190918, "rewards_train/2-2": 2.0961151123046875, "rewards_train/2-w": -0.7403603792190552, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0414947867393494, "rewards_train/margins_1": 3.036313533782959, "rewards_train/margins_2": 2.8364754915237427, "step": 106 }, { "epoch": 0.32, "logps_train/policy_1_2": -187.66119384765625, "logps_train/policy_1_l": -157.5135040283203, "logps_train/policy_1_w": -164.89378356933594, "logps_train/policy_2_2": -138.42694091796875, "logps_train/policy_2_w": -215.46311950683594, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -0.5456120371818542, "rewards_train/1-l": -1.2150226831436157, "rewards_train/1-w": 2.0900650024414062, "rewards_train/2-2": 2.498223304748535, "rewards_train/2-w": -0.8060780763626099, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.305087685585022, "rewards_train/margins_1": 2.6356770396232605, "rewards_train/margins_2": 3.304301381111145, "step": 106 }, { "epoch": 0.32, "logps_train/policy_1_2": -137.6077880859375, "logps_train/policy_1_l": -170.80404663085938, "logps_train/policy_1_w": -138.59259033203125, "logps_train/policy_2_2": -102.59651184082031, "logps_train/policy_2_w": -185.1259765625, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.027185484766960144, "rewards_train/1-l": -2.251008987426758, "rewards_train/1-w": 2.228436231613159, "rewards_train/2-2": 1.8544113636016846, "rewards_train/2-w": -0.2054700255393982, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.479445219039917, "rewards_train/margins_1": 2.2556217163801193, "rewards_train/margins_2": 2.0598813891410828, "step": 106 }, { "epoch": 0.32, "logps_train/policy_1_2": -239.18280029296875, "logps_train/policy_1_l": -219.1569061279297, "logps_train/policy_1_w": -166.77880859375, "logps_train/policy_2_2": -185.4869384765625, "logps_train/policy_2_w": -229.21002197265625, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -0.7842955589294434, "rewards_train/1-l": -2.4156904220581055, "rewards_train/1-w": 2.2150866985321045, "rewards_train/2-2": 2.3864622116088867, "rewards_train/2-w": -1.1155331134796143, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.63077712059021, "rewards_train/margins_1": 2.999382257461548, "rewards_train/margins_2": 3.501995325088501, "step": 106 }, { "epoch": 0.32, "logps_train/policy_1_2": -196.51119995117188, "logps_train/policy_1_l": -167.8339385986328, "logps_train/policy_1_w": -132.14675903320312, "logps_train/policy_2_2": -138.8934326171875, "logps_train/policy_2_w": -183.4104766845703, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -0.48744848370552063, "rewards_train/1-l": -1.7131303548812866, "rewards_train/1-w": 1.8513386249542236, "rewards_train/2-2": 2.7231578826904297, "rewards_train/2-w": -0.789681077003479, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.5644689798355103, "rewards_train/margins_1": 2.3387871086597443, "rewards_train/margins_2": 3.5128389596939087, "step": 107 }, { "epoch": 0.32, "logps_train/policy_1_2": -180.04180908203125, "logps_train/policy_1_l": -180.60525512695312, "logps_train/policy_1_w": -118.136962890625, "logps_train/policy_2_2": -125.94064331054688, "logps_train/policy_2_w": -167.63406372070312, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -1.0272281169891357, "rewards_train/1-l": -1.6133822202682495, "rewards_train/1-w": 2.3163819313049316, "rewards_train/2-2": 1.7574979066848755, "rewards_train/2-w": -0.43684351444244385, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.929764151573181, "rewards_train/margins_1": 3.3436100482940674, "rewards_train/margins_2": 2.1943414211273193, "step": 107 }, { "epoch": 0.32, "logps_train/policy_1_2": -232.80645751953125, "logps_train/policy_1_l": -178.714111328125, "logps_train/policy_1_w": -125.8625717163086, "logps_train/policy_2_2": -161.21054077148438, "logps_train/policy_2_w": -189.49114990234375, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.6521296501159668, "rewards_train/1-l": -1.184107780456543, "rewards_train/1-w": 1.9604219198226929, "rewards_train/2-2": 2.711367130279541, "rewards_train/2-w": -1.344818353652954, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.144529700279236, "rewards_train/margins_1": 3.6125515699386597, "rewards_train/margins_2": 4.056185483932495, "step": 107 }, { "epoch": 0.32, "logps_train/policy_1_2": -172.2960968017578, "logps_train/policy_1_l": -196.45291137695312, "logps_train/policy_1_w": -127.00872802734375, "logps_train/policy_2_2": -128.62765502929688, "logps_train/policy_2_w": -177.1100616455078, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -0.38390618562698364, "rewards_train/1-l": -1.431618332862854, "rewards_train/1-w": 1.5368232727050781, "rewards_train/2-2": 1.9106732606887817, "rewards_train/2-w": -1.1000683307647705, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.968441605567932, "rewards_train/margins_1": 1.9207294583320618, "rewards_train/margins_2": 3.0107415914535522, "step": 107 }, { "epoch": 0.32, "learning_rate": 4.834835026291348e-06, "loss": 1.0969, "step": 108 }, { "epoch": 0.32, "logps_train/policy_1_2": -192.27542114257812, "logps_train/policy_1_l": -144.62986755371094, "logps_train/policy_1_w": -97.67672729492188, "logps_train/policy_2_2": -135.00704956054688, "logps_train/policy_2_w": -144.4366455078125, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": -0.5392616391181946, "rewards_train/1-l": -1.3834946155548096, "rewards_train/1-w": 1.3252956867218018, "rewards_train/2-2": 2.5891382694244385, "rewards_train/2-w": -1.2104631662368774, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.7087903022766113, "rewards_train/margins_1": 1.8645573258399963, "rewards_train/margins_2": 3.799601435661316, "step": 108 }, { "epoch": 0.32, "logps_train/policy_1_2": -115.23503112792969, "logps_train/policy_1_l": -141.5187530517578, "logps_train/policy_1_w": -64.62417602539062, "logps_train/policy_2_2": -76.4769287109375, "logps_train/policy_2_w": -101.2451400756836, "logps_train/ref_1_2": -108.5, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -91.5, "logps_train/ref_2_w": -97.5, "rewards_train/1-2": -0.6656914949417114, "rewards_train/1-l": -1.2744340896606445, "rewards_train/1-w": 1.5350431203842163, "rewards_train/2-2": 1.4894161224365234, "rewards_train/2-w": -0.36689677834510803, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.809477210044861, "rewards_train/margins_1": 2.2007346153259277, "rewards_train/margins_2": 1.8563129007816315, "step": 108 }, { "epoch": 0.32, "logps_train/policy_1_2": -118.83499145507812, "logps_train/policy_1_l": -76.92418670654297, "logps_train/policy_1_w": -93.49407958984375, "logps_train/policy_2_2": -79.05433654785156, "logps_train/policy_2_w": -139.34127807617188, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -71.5, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -0.3163118362426758, "rewards_train/1-l": -0.5580437779426575, "rewards_train/1-w": 1.694244146347046, "rewards_train/2-2": 1.737534523010254, "rewards_train/2-w": -0.4102987051010132, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.2522879242897034, "rewards_train/margins_1": 2.0105559825897217, "rewards_train/margins_2": 2.147833228111267, "step": 108 }, { "epoch": 0.32, "logps_train/policy_1_2": -114.2066650390625, "logps_train/policy_1_l": -142.24002075195312, "logps_train/policy_1_w": -118.03046417236328, "logps_train/policy_2_2": -81.5841064453125, "logps_train/policy_2_w": -161.017822265625, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -0.2003539800643921, "rewards_train/1-l": -1.2917749881744385, "rewards_train/1-w": 1.9055476188659668, "rewards_train/2-2": 1.5036982297897339, "rewards_train/2-w": -0.41896945238113403, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.1973226070404053, "rewards_train/margins_1": 2.105901598930359, "rewards_train/margins_2": 1.922667682170868, "step": 108 }, { "epoch": 0.33, "logps_train/policy_1_2": -124.83263397216797, "logps_train/policy_1_l": -136.5576171875, "logps_train/policy_1_w": -123.7218246459961, "logps_train/policy_2_2": -90.54800415039062, "logps_train/policy_2_w": -169.25503540039062, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 0.2476934790611267, "rewards_train/1-l": -1.2837893962860107, "rewards_train/1-w": 2.2967629432678223, "rewards_train/2-2": 2.0366058349609375, "rewards_train/2-w": 0.001839917153120041, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.580552339553833, "rewards_train/margins_1": 2.0490694642066956, "rewards_train/margins_2": 2.0347659178078175, "step": 109 }, { "epoch": 0.33, "logps_train/policy_1_2": -139.7324981689453, "logps_train/policy_1_l": -166.17706298828125, "logps_train/policy_1_w": -129.99203491210938, "logps_train/policy_2_2": -103.828125, "logps_train/policy_2_w": -176.310546875, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": 0.1867121160030365, "rewards_train/1-l": -1.2916812896728516, "rewards_train/1-w": 2.4697422981262207, "rewards_train/2-2": 2.171875476837158, "rewards_train/2-w": -0.07519611716270447, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.7614235877990723, "rewards_train/margins_1": 2.283030182123184, "rewards_train/margins_2": 2.2470715939998627, "step": 109 }, { "epoch": 0.33, "logps_train/policy_1_2": -120.8062744140625, "logps_train/policy_1_l": -87.27584075927734, "logps_train/policy_1_w": -72.1928482055664, "logps_train/policy_2_2": -80.58001708984375, "logps_train/policy_2_w": -112.24263000488281, "logps_train/ref_1_2": -112.5, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -106.5, "rewards_train/1-2": -0.8007946610450745, "rewards_train/1-l": -1.0094196796417236, "rewards_train/1-w": 1.4957541227340698, "rewards_train/2-2": 1.455597162246704, "rewards_train/2-w": -0.5764108896255493, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.5051738023757935, "rewards_train/margins_1": 2.2965487837791443, "rewards_train/margins_2": 2.0320080518722534, "step": 109 }, { "epoch": 0.33, "logps_train/policy_1_2": -196.7836151123047, "logps_train/policy_1_l": -207.753662109375, "logps_train/policy_1_w": -162.36932373046875, "logps_train/policy_2_2": -135.8034210205078, "logps_train/policy_2_w": -244.53335571289062, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -1.0168379545211792, "rewards_train/1-l": -1.9407970905303955, "rewards_train/1-w": 2.7504708766937256, "rewards_train/2-2": 2.285088300704956, "rewards_train/2-w": -1.8674949407577515, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.691267967224121, "rewards_train/margins_1": 3.767308831214905, "rewards_train/margins_2": 4.1525832414627075, "step": 109 }, { "epoch": 0.33, "learning_rate": 4.825893023964886e-06, "loss": 1.1822, "step": 110 }, { "epoch": 0.33, "logps_train/policy_1_2": -182.50692749023438, "logps_train/policy_1_l": -184.3254852294922, "logps_train/policy_1_w": -143.4689178466797, "logps_train/policy_2_2": -130.1161346435547, "logps_train/policy_2_w": -192.00009155273438, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -0.6522551774978638, "rewards_train/1-l": -1.9095025062561035, "rewards_train/1-w": 1.811506748199463, "rewards_train/2-2": 2.068464994430542, "rewards_train/2-w": -0.8109463453292847, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7210092544555664, "rewards_train/margins_1": 2.4637619256973267, "rewards_train/margins_2": 2.8794113397598267, "step": 110 }, { "epoch": 0.33, "logps_train/policy_1_2": -171.0704803466797, "logps_train/policy_1_l": -159.03024291992188, "logps_train/policy_1_w": -117.04615020751953, "logps_train/policy_2_2": -126.13715362548828, "logps_train/policy_2_w": -158.38287353515625, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": -0.7730147838592529, "rewards_train/1-l": -1.5417437553405762, "rewards_train/1-w": 1.7553945779800415, "rewards_train/2-2": 1.8913629055023193, "rewards_train/2-w": -0.40547582507133484, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2971383333206177, "rewards_train/margins_1": 2.5284093618392944, "rewards_train/margins_2": 2.296838730573654, "step": 110 }, { "epoch": 0.33, "logps_train/policy_1_2": -269.7200927734375, "logps_train/policy_1_l": -245.8302001953125, "logps_train/policy_1_w": -152.03289794921875, "logps_train/policy_2_2": -177.18505859375, "logps_train/policy_2_w": -230.97683715820312, "logps_train/ref_1_2": -243.0, "logps_train/ref_1_l": -229.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -2.69818115234375, "rewards_train/1-l": -1.6753532886505127, "rewards_train/1-w": 2.3049139976501465, "rewards_train/2-2": 2.464695692062378, "rewards_train/2-w": -2.0515894889831543, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.980267286300659, "rewards_train/margins_1": 5.0030951499938965, "rewards_train/margins_2": 4.516285181045532, "step": 110 }, { "epoch": 0.33, "logps_train/policy_1_2": -137.37860107421875, "logps_train/policy_1_l": -102.6478271484375, "logps_train/policy_1_w": -107.1002197265625, "logps_train/policy_2_2": -103.14881896972656, "logps_train/policy_2_w": -145.92022705078125, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": -0.4052923619747162, "rewards_train/1-l": -0.5451775789260864, "rewards_train/1-w": 1.699352502822876, "rewards_train/2-2": 1.2076765298843384, "rewards_train/2-w": -0.07795888185501099, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.2445300817489624, "rewards_train/margins_1": 2.104644864797592, "rewards_train/margins_2": 1.2856354117393494, "step": 110 }, { "epoch": 0.33, "logps_train/policy_1_2": -227.652587890625, "logps_train/policy_1_l": -189.17420959472656, "logps_train/policy_1_w": -158.70004272460938, "logps_train/policy_2_2": -160.35084533691406, "logps_train/policy_2_w": -218.5102996826172, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -1.1519780158996582, "rewards_train/1-l": -2.2002344131469727, "rewards_train/1-w": 1.7319483757019043, "rewards_train/2-2": 2.4453845024108887, "rewards_train/2-w": -1.4211478233337402, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.932182788848877, "rewards_train/margins_1": 2.8839263916015625, "rewards_train/margins_2": 3.866532325744629, "step": 111 }, { "epoch": 0.33, "logps_train/policy_1_2": -270.96978759765625, "logps_train/policy_1_l": -210.3660888671875, "logps_train/policy_1_w": -149.88265991210938, "logps_train/policy_2_2": -189.05657958984375, "logps_train/policy_2_w": -207.89456176757812, "logps_train/ref_1_2": -245.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -2.617680788040161, "rewards_train/1-l": -2.226062536239624, "rewards_train/1-w": 1.8922019004821777, "rewards_train/2-2": 2.1873583793640137, "rewards_train/2-w": -0.9683617353439331, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.118264436721802, "rewards_train/margins_1": 4.509882688522339, "rewards_train/margins_2": 3.1557201147079468, "step": 111 }, { "epoch": 0.33, "logps_train/policy_1_2": -192.840087890625, "logps_train/policy_1_l": -202.45228576660156, "logps_train/policy_1_w": -136.67745971679688, "logps_train/policy_2_2": -133.77188110351562, "logps_train/policy_2_w": -211.33485412597656, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -1.0238518714904785, "rewards_train/1-l": -2.372572422027588, "rewards_train/1-w": 2.308084726333618, "rewards_train/2-2": 2.1763267517089844, "rewards_train/2-w": -1.634852409362793, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.680657148361206, "rewards_train/margins_1": 3.3319365978240967, "rewards_train/margins_2": 3.8111791610717773, "step": 111 }, { "epoch": 0.33, "logps_train/policy_1_2": -164.12530517578125, "logps_train/policy_1_l": -180.8031005859375, "logps_train/policy_1_w": -136.62388610839844, "logps_train/policy_2_2": -128.6730194091797, "logps_train/policy_2_w": -178.49301147460938, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.18362490832805634, "rewards_train/1-l": -1.1680541038513184, "rewards_train/1-w": 1.2914204597473145, "rewards_train/2-2": 1.6858227252960205, "rewards_train/2-w": -0.7804532051086426, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.459474563598633, "rewards_train/margins_1": 1.4750453680753708, "rewards_train/margins_2": 2.466275930404663, "step": 111 }, { "epoch": 0.34, "learning_rate": 4.816724018579584e-06, "loss": 1.0721, "step": 112 }, { "epoch": 0.34, "logps_train/policy_1_2": -157.69345092773438, "logps_train/policy_1_l": -183.02737426757812, "logps_train/policy_1_w": -150.55300903320312, "logps_train/policy_2_2": -106.53246307373047, "logps_train/policy_2_w": -212.70675659179688, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -1.0377057790756226, "rewards_train/1-l": -1.6125023365020752, "rewards_train/1-w": 2.516573429107666, "rewards_train/2-2": 1.8362067937850952, "rewards_train/2-w": -0.9238005876541138, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.129075765609741, "rewards_train/margins_1": 3.5542792081832886, "rewards_train/margins_2": 2.760007381439209, "step": 112 }, { "epoch": 0.34, "logps_train/policy_1_2": -144.67660522460938, "logps_train/policy_1_l": -138.44747924804688, "logps_train/policy_1_w": -106.70387268066406, "logps_train/policy_2_2": -99.90750122070312, "logps_train/policy_2_w": -144.31875610351562, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -0.867466151714325, "rewards_train/1-l": -1.317209005355835, "rewards_train/1-w": 1.3952865600585938, "rewards_train/2-2": 1.6946510076522827, "rewards_train/2-w": -0.67118239402771, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.7124955654144287, "rewards_train/margins_1": 2.2627527117729187, "rewards_train/margins_2": 2.3658334016799927, "step": 112 }, { "epoch": 0.34, "logps_train/policy_1_2": -170.75607299804688, "logps_train/policy_1_l": -168.1520233154297, "logps_train/policy_1_w": -115.43010711669922, "logps_train/policy_2_2": -120.50752258300781, "logps_train/policy_2_w": -164.8580780029297, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.6959197521209717, "rewards_train/1-l": -2.0392262935638428, "rewards_train/1-w": 1.954742670059204, "rewards_train/2-2": 1.9289352893829346, "rewards_train/2-w": -0.43766382336616516, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.993968963623047, "rewards_train/margins_1": 2.650662422180176, "rewards_train/margins_2": 2.3665991127490997, "step": 112 }, { "epoch": 0.34, "logps_train/policy_1_2": -203.64022827148438, "logps_train/policy_1_l": -180.14340209960938, "logps_train/policy_1_w": -126.13980102539062, "logps_train/policy_2_2": -154.58396911621094, "logps_train/policy_2_w": -174.27938842773438, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -0.5460555553436279, "rewards_train/1-l": -1.3530113697052002, "rewards_train/1-w": 1.9569180011749268, "rewards_train/2-2": 2.379102945327759, "rewards_train/2-w": -0.8486427068710327, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.309929370880127, "rewards_train/margins_1": 2.5029735565185547, "rewards_train/margins_2": 3.2277456521987915, "step": 112 }, { "epoch": 0.34, "logps_train/policy_1_2": -181.8348388671875, "logps_train/policy_1_l": -194.40557861328125, "logps_train/policy_1_w": -119.16542053222656, "logps_train/policy_2_2": -129.91763305664062, "logps_train/policy_2_w": -179.74224853515625, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.221179485321045, "rewards_train/1-l": -2.1161446571350098, "rewards_train/1-w": 1.9986923933029175, "rewards_train/2-2": 1.8744473457336426, "rewards_train/2-w": -1.1296937465667725, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.114837050437927, "rewards_train/margins_1": 3.2198718786239624, "rewards_train/margins_2": 3.004141092300415, "step": 113 }, { "epoch": 0.34, "logps_train/policy_1_2": -135.78048706054688, "logps_train/policy_1_l": -133.76707458496094, "logps_train/policy_1_w": -100.35882568359375, "logps_train/policy_2_2": -87.32557678222656, "logps_train/policy_2_w": -142.35366821289062, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -0.4575408101081848, "rewards_train/1-l": -1.4403798580169678, "rewards_train/1-w": 1.6033751964569092, "rewards_train/2-2": 2.1414661407470703, "rewards_train/2-w": -0.5066558122634888, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.043755054473877, "rewards_train/margins_1": 2.060916006565094, "rewards_train/margins_2": 2.648121953010559, "step": 113 }, { "epoch": 0.34, "logps_train/policy_1_2": -188.73947143554688, "logps_train/policy_1_l": -183.64044189453125, "logps_train/policy_1_w": -138.62155151367188, "logps_train/policy_2_2": -141.5593719482422, "logps_train/policy_2_w": -195.612548828125, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -0.8048067688941956, "rewards_train/1-l": -1.3618957996368408, "rewards_train/1-w": 1.9933137893676758, "rewards_train/2-2": 1.7116405963897705, "rewards_train/2-w": -0.8651612401008606, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.3552095890045166, "rewards_train/margins_1": 2.7981205582618713, "rewards_train/margins_2": 2.576801836490631, "step": 113 }, { "epoch": 0.34, "logps_train/policy_1_2": -220.51016235351562, "logps_train/policy_1_l": -186.16400146484375, "logps_train/policy_1_w": -136.7640380859375, "logps_train/policy_2_2": -160.88290405273438, "logps_train/policy_2_w": -217.33370971679688, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -0.9900776147842407, "rewards_train/1-l": -2.5212841033935547, "rewards_train/1-w": 2.554065704345703, "rewards_train/2-2": 2.252333879470825, "rewards_train/2-w": -2.0786831378936768, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.075349807739258, "rewards_train/margins_1": 3.544143319129944, "rewards_train/margins_2": 4.331017017364502, "step": 113 }, { "epoch": 0.34, "learning_rate": 4.807328905014201e-06, "loss": 1.1529, "step": 114 }, { "epoch": 0.34, "logps_train/policy_1_2": -174.53060913085938, "logps_train/policy_1_l": -147.82443237304688, "logps_train/policy_1_w": -132.0877685546875, "logps_train/policy_2_2": -130.99478149414062, "logps_train/policy_2_w": -184.6606903076172, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -0.5507173538208008, "rewards_train/1-l": -1.2572965621948242, "rewards_train/1-w": 1.6017714738845825, "rewards_train/2-2": 2.196615219116211, "rewards_train/2-w": -1.3336472511291504, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.8590680360794067, "rewards_train/margins_1": 2.1524888277053833, "rewards_train/margins_2": 3.5302624702453613, "step": 114 }, { "epoch": 0.34, "logps_train/policy_1_2": -190.20960998535156, "logps_train/policy_1_l": -161.54312133789062, "logps_train/policy_1_w": -141.19281005859375, "logps_train/policy_2_2": -142.408203125, "logps_train/policy_2_w": -200.21969604492188, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.42975008487701416, "rewards_train/1-l": -1.8677898645401, "rewards_train/1-w": 2.2115797996520996, "rewards_train/2-2": 2.0769522190093994, "rewards_train/2-w": -0.8196255564689636, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.0793696641922, "rewards_train/margins_1": 2.6413298845291138, "rewards_train/margins_2": 2.896577775478363, "step": 114 }, { "epoch": 0.34, "logps_train/policy_1_2": -228.75640869140625, "logps_train/policy_1_l": -177.28530883789062, "logps_train/policy_1_w": -115.80172729492188, "logps_train/policy_2_2": -170.15655517578125, "logps_train/policy_2_w": -166.51507568359375, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -0.7920469045639038, "rewards_train/1-l": -1.898599624633789, "rewards_train/1-w": 2.2487335205078125, "rewards_train/2-2": 2.4603211879730225, "rewards_train/2-w": -0.07885220646858215, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.147333145141602, "rewards_train/margins_1": 3.0407804250717163, "rewards_train/margins_2": 2.5391733944416046, "step": 114 }, { "epoch": 0.34, "logps_train/policy_1_2": -151.27908325195312, "logps_train/policy_1_l": -159.38311767578125, "logps_train/policy_1_w": -124.2801513671875, "logps_train/policy_2_2": -106.36490631103516, "logps_train/policy_2_w": -179.29852294921875, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.8197052478790283, "rewards_train/1-l": -1.359576940536499, "rewards_train/1-w": 2.0028440952301025, "rewards_train/2-2": 1.6724934577941895, "rewards_train/2-w": -0.8790717720985413, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.3624210357666016, "rewards_train/margins_1": 2.822549343109131, "rewards_train/margins_2": 2.5515652298927307, "step": 114 }, { "epoch": 0.34, "logps_train/policy_1_2": -155.6685028076172, "logps_train/policy_1_l": -169.60885620117188, "logps_train/policy_1_w": -142.5835418701172, "logps_train/policy_2_2": -113.9968490600586, "logps_train/policy_2_w": -203.01034545898438, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -0.6191938519477844, "rewards_train/1-l": -1.5394017696380615, "rewards_train/1-w": 2.086177110671997, "rewards_train/2-2": 1.82844078540802, "rewards_train/2-w": -0.9822835922241211, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6255788803100586, "rewards_train/margins_1": 2.7053709626197815, "rewards_train/margins_2": 2.810724377632141, "step": 115 }, { "epoch": 0.34, "logps_train/policy_1_2": -280.9844970703125, "logps_train/policy_1_l": -233.6044921875, "logps_train/policy_1_w": -182.8090362548828, "logps_train/policy_2_2": -195.6717071533203, "logps_train/policy_2_w": -274.4818420410156, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -211.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -255.0, "rewards_train/1-2": -2.3172006607055664, "rewards_train/1-l": -2.252636432647705, "rewards_train/1-w": 2.851128101348877, "rewards_train/2-2": 2.658219337463379, "rewards_train/2-w": -1.9606841802597046, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.103764533996582, "rewards_train/margins_1": 5.168328762054443, "rewards_train/margins_2": 4.6189035177230835, "step": 115 }, { "epoch": 0.34, "logps_train/policy_1_2": -142.05709838867188, "logps_train/policy_1_l": -174.90817260742188, "logps_train/policy_1_w": -95.82243347167969, "logps_train/policy_2_2": -111.29901885986328, "logps_train/policy_2_w": -126.30647277832031, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": -0.07343460619449615, "rewards_train/1-l": -1.637449026107788, "rewards_train/1-w": 1.6704418659210205, "rewards_train/2-2": 1.634990930557251, "rewards_train/2-w": -0.08521220088005066, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.3078908920288086, "rewards_train/margins_1": 1.7438764721155167, "rewards_train/margins_2": 1.7202031314373016, "step": 115 }, { "epoch": 0.34, "logps_train/policy_1_2": -142.7976531982422, "logps_train/policy_1_l": -192.44825744628906, "logps_train/policy_1_w": -105.58743286132812, "logps_train/policy_2_2": -105.6937255859375, "logps_train/policy_2_w": -149.4765625, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -0.3750775456428528, "rewards_train/1-l": -1.9986835718154907, "rewards_train/1-w": 1.4826624393463135, "rewards_train/2-2": 1.6267213821411133, "rewards_train/2-w": -0.839844822883606, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.481346011161804, "rewards_train/margins_1": 1.8577399849891663, "rewards_train/margins_2": 2.4665662050247192, "step": 115 }, { "epoch": 0.35, "learning_rate": 4.797708600215259e-06, "loss": 1.0717, "step": 116 }, { "epoch": 0.35, "logps_train/policy_1_2": -179.7599334716797, "logps_train/policy_1_l": -186.61976623535156, "logps_train/policy_1_w": -131.50180053710938, "logps_train/policy_2_2": -122.88539123535156, "logps_train/policy_2_w": -198.51199340820312, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.0545090436935425, "rewards_train/1-l": -2.212661027908325, "rewards_train/1-w": 2.4486489295959473, "rewards_train/2-2": 1.9993526935577393, "rewards_train/2-w": -1.2840114831924438, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.6613099575042725, "rewards_train/margins_1": 3.5031579732894897, "rewards_train/margins_2": 3.283364176750183, "step": 116 }, { "epoch": 0.35, "logps_train/policy_1_2": -177.55923461914062, "logps_train/policy_1_l": -144.25857543945312, "logps_train/policy_1_w": -122.20042419433594, "logps_train/policy_2_2": -125.66065979003906, "logps_train/policy_2_w": -165.18505859375, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -0.2925451397895813, "rewards_train/1-l": -1.6615023612976074, "rewards_train/1-w": 2.094409942626953, "rewards_train/2-2": 2.5280752182006836, "rewards_train/2-w": -0.2919427752494812, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.7559123039245605, "rewards_train/margins_1": 2.3869550824165344, "rewards_train/margins_2": 2.820017993450165, "step": 116 }, { "epoch": 0.35, "logps_train/policy_1_2": -165.91653442382812, "logps_train/policy_1_l": -155.72311401367188, "logps_train/policy_1_w": -122.78494262695312, "logps_train/policy_2_2": -113.88551330566406, "logps_train/policy_2_w": -173.14808654785156, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -0.8771995306015015, "rewards_train/1-l": -1.7488291263580322, "rewards_train/1-w": 2.243967056274414, "rewards_train/2-2": 2.182420253753662, "rewards_train/2-w": -0.6628562211990356, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9927961826324463, "rewards_train/margins_1": 3.1211665868759155, "rewards_train/margins_2": 2.8452764749526978, "step": 116 }, { "epoch": 0.35, "logps_train/policy_1_2": -172.68533325195312, "logps_train/policy_1_l": -165.15957641601562, "logps_train/policy_1_w": -116.03258514404297, "logps_train/policy_2_2": -119.98584747314453, "logps_train/policy_2_w": -159.29159545898438, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -0.7646273374557495, "rewards_train/1-l": -1.536241054534912, "rewards_train/1-w": 1.8053354024887085, "rewards_train/2-2": 2.0068845748901367, "rewards_train/2-w": -0.1415630578994751, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.3415764570236206, "rewards_train/margins_1": 2.569962739944458, "rewards_train/margins_2": 2.148447632789612, "step": 116 }, { "epoch": 0.35, "logps_train/policy_1_2": -310.270751953125, "logps_train/policy_1_l": -294.74713134765625, "logps_train/policy_1_w": -134.31063842773438, "logps_train/policy_2_2": -214.97076416015625, "logps_train/policy_2_w": -207.47555541992188, "logps_train/ref_1_2": -282.0, "logps_train/ref_1_l": -262.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -247.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": -2.9259049892425537, "rewards_train/1-l": -3.2210001945495605, "rewards_train/1-w": 2.0826072692871094, "rewards_train/2-2": 3.1888608932495117, "rewards_train/2-w": -1.8653290271759033, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.30360746383667, "rewards_train/margins_1": 5.008512258529663, "rewards_train/margins_2": 5.054189920425415, "step": 117 }, { "epoch": 0.35, "logps_train/policy_1_2": -158.71072387695312, "logps_train/policy_1_l": -149.84861755371094, "logps_train/policy_1_w": -160.64915466308594, "logps_train/policy_2_2": -123.21786499023438, "logps_train/policy_2_w": -224.55758666992188, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -0.261696457862854, "rewards_train/1-l": -1.8116207122802734, "rewards_train/1-w": 2.7218031883239746, "rewards_train/2-2": 1.65946364402771, "rewards_train/2-w": -0.9963824152946472, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.533423900604248, "rewards_train/margins_1": 2.9834996461868286, "rewards_train/margins_2": 2.655846059322357, "step": 117 }, { "epoch": 0.35, "logps_train/policy_1_2": -233.88966369628906, "logps_train/policy_1_l": -235.95458984375, "logps_train/policy_1_w": -183.18048095703125, "logps_train/policy_2_2": -173.93402099609375, "logps_train/policy_2_w": -247.96658325195312, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -211.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": -1.2151386737823486, "rewards_train/1-l": -2.863917112350464, "rewards_train/1-w": 2.7743353843688965, "rewards_train/2-2": 2.2626523971557617, "rewards_train/2-w": -1.0552524328231812, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.63825249671936, "rewards_train/margins_1": 3.989474058151245, "rewards_train/margins_2": 3.317904829978943, "step": 117 }, { "epoch": 0.35, "logps_train/policy_1_2": -112.22286987304688, "logps_train/policy_1_l": -103.73744201660156, "logps_train/policy_1_w": -76.47889709472656, "logps_train/policy_2_2": -87.42131042480469, "logps_train/policy_2_w": -102.9788818359375, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -95.5, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -101.5, "rewards_train/1-2": -0.06857684254646301, "rewards_train/1-l": -0.8175923824310303, "rewards_train/1-w": 0.973985493183136, "rewards_train/2-2": 1.1586503982543945, "rewards_train/2-w": -0.1631227284669876, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 1.7915778756141663, "rewards_train/margins_1": 1.042562335729599, "rewards_train/margins_2": 1.3217731267213821, "step": 117 }, { "epoch": 0.35, "learning_rate": 4.7878640431075466e-06, "loss": 1.0457, "step": 118 }, { "epoch": 0.35, "logps_train/policy_1_2": -130.4310760498047, "logps_train/policy_1_l": -139.67239379882812, "logps_train/policy_1_w": -83.28402709960938, "logps_train/policy_2_2": -93.43203735351562, "logps_train/policy_2_w": -121.69194030761719, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": -0.5788502097129822, "rewards_train/1-l": -1.238308072090149, "rewards_train/1-w": 1.2408355474472046, "rewards_train/2-2": 1.1911711692810059, "rewards_train/2-w": -0.7688034176826477, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.4791436195373535, "rewards_train/margins_1": 1.8196857571601868, "rewards_train/margins_2": 1.9599745869636536, "step": 118 }, { "epoch": 0.35, "logps_train/policy_1_2": -177.60238647460938, "logps_train/policy_1_l": -163.82611083984375, "logps_train/policy_1_w": -119.7891845703125, "logps_train/policy_2_2": -130.54318237304688, "logps_train/policy_2_w": -167.45602416992188, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": -0.8607277870178223, "rewards_train/1-l": -1.5372986793518066, "rewards_train/1-w": 2.3425662517547607, "rewards_train/2-2": 1.5201950073242188, "rewards_train/2-w": -0.08876687288284302, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8798649311065674, "rewards_train/margins_1": 3.203294038772583, "rewards_train/margins_2": 1.6089618802070618, "step": 118 }, { "epoch": 0.35, "logps_train/policy_1_2": -157.0557403564453, "logps_train/policy_1_l": -203.0711669921875, "logps_train/policy_1_w": -166.64044189453125, "logps_train/policy_2_2": -117.63348388671875, "logps_train/policy_2_w": -219.81185913085938, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -0.022956490516662598, "rewards_train/1-l": -2.145495891571045, "rewards_train/1-w": 2.334394931793213, "rewards_train/2-2": 2.2141904830932617, "rewards_train/2-w": -0.7399754524230957, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.479890823364258, "rewards_train/margins_1": 2.3573514223098755, "rewards_train/margins_2": 2.9541659355163574, "step": 118 }, { "epoch": 0.35, "logps_train/policy_1_2": -190.06948852539062, "logps_train/policy_1_l": -137.79629516601562, "logps_train/policy_1_w": -141.64434814453125, "logps_train/policy_2_2": -126.20329284667969, "logps_train/policy_2_w": -201.99160766601562, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.6096848249435425, "rewards_train/1-l": -1.8005270957946777, "rewards_train/1-w": 1.7074403762817383, "rewards_train/2-2": 1.9951001405715942, "rewards_train/2-w": -1.4975008964538574, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.507967472076416, "rewards_train/margins_1": 3.3171252012252808, "rewards_train/margins_2": 3.4926010370254517, "step": 118 }, { "epoch": 0.36, "logps_train/policy_1_2": -195.46063232421875, "logps_train/policy_1_l": -171.38385009765625, "logps_train/policy_1_w": -142.24945068359375, "logps_train/policy_2_2": -145.07928466796875, "logps_train/policy_2_w": -197.08224487304688, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -1.1042659282684326, "rewards_train/1-l": -1.8691470623016357, "rewards_train/1-w": 2.3131418228149414, "rewards_train/2-2": 1.8366024494171143, "rewards_train/2-w": -0.2810768485069275, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.182288885116577, "rewards_train/margins_1": 3.417407751083374, "rewards_train/margins_2": 2.1176792979240417, "step": 119 }, { "epoch": 0.36, "logps_train/policy_1_2": -208.9921112060547, "logps_train/policy_1_l": -173.936767578125, "logps_train/policy_1_w": -106.91427612304688, "logps_train/policy_2_2": -144.43675231933594, "logps_train/policy_2_w": -167.82528686523438, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": -2.1187429428100586, "rewards_train/1-l": -1.6928963661193848, "rewards_train/1-w": 2.07029128074646, "rewards_train/2-2": 1.9686295986175537, "rewards_train/2-w": -1.2655373811721802, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7631876468658447, "rewards_train/margins_1": 4.1890342235565186, "rewards_train/margins_2": 3.234166979789734, "step": 119 }, { "epoch": 0.36, "logps_train/policy_1_2": -142.39321899414062, "logps_train/policy_1_l": -132.8550567626953, "logps_train/policy_1_w": -118.04400634765625, "logps_train/policy_2_2": -95.19487762451172, "logps_train/policy_2_w": -176.24798583984375, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -0.6944003105163574, "rewards_train/1-l": -1.7082695960998535, "rewards_train/1-w": 2.294525623321533, "rewards_train/2-2": 1.8086378574371338, "rewards_train/2-w": -0.8491151332855225, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.002795219421387, "rewards_train/margins_1": 2.9889259338378906, "rewards_train/margins_2": 2.6577529907226562, "step": 119 }, { "epoch": 0.36, "logps_train/policy_1_2": -179.22496032714844, "logps_train/policy_1_l": -141.20404052734375, "logps_train/policy_1_w": -131.33364868164062, "logps_train/policy_2_2": -141.08987426757812, "logps_train/policy_2_w": -180.01248168945312, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -0.10335567593574524, "rewards_train/1-l": -1.9216727018356323, "rewards_train/1-w": 2.3055028915405273, "rewards_train/2-2": 2.1270484924316406, "rewards_train/2-w": -0.8250754475593567, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.22717559337616, "rewards_train/margins_1": 2.4088585674762726, "rewards_train/margins_2": 2.9521239399909973, "step": 119 }, { "epoch": 0.36, "learning_rate": 4.7777961945024834e-06, "loss": 1.1438, "step": 120 }, { "epoch": 0.36, "logps_train/policy_1_2": -168.87094116210938, "logps_train/policy_1_l": -184.5152130126953, "logps_train/policy_1_w": -111.5342788696289, "logps_train/policy_2_2": -125.87210083007812, "logps_train/policy_2_w": -171.81607055664062, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -0.5312355160713196, "rewards_train/1-l": -1.9612871408462524, "rewards_train/1-w": 2.034365177154541, "rewards_train/2-2": 1.812595248222351, "rewards_train/2-w": -1.5685209035873413, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.9956523180007935, "rewards_train/margins_1": 2.5656006932258606, "rewards_train/margins_2": 3.3811161518096924, "step": 120 }, { "epoch": 0.36, "logps_train/policy_1_2": -194.20230102539062, "logps_train/policy_1_l": -144.03675842285156, "logps_train/policy_1_w": -107.46188354492188, "logps_train/policy_2_2": -144.84307861328125, "logps_train/policy_2_w": -162.93325805664062, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -1.0753073692321777, "rewards_train/1-l": -1.0748672485351562, "rewards_train/1-w": 1.7956085205078125, "rewards_train/2-2": 1.7390319108963013, "rewards_train/2-w": -1.388443112373352, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.8704757690429688, "rewards_train/margins_1": 2.8709158897399902, "rewards_train/margins_2": 3.1274750232696533, "step": 120 }, { "epoch": 0.36, "logps_train/policy_1_2": -156.15859985351562, "logps_train/policy_1_l": -152.50877380371094, "logps_train/policy_1_w": -103.71322631835938, "logps_train/policy_2_2": -119.25777435302734, "logps_train/policy_2_w": -146.69686889648438, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -0.39750030636787415, "rewards_train/1-l": -1.5266098976135254, "rewards_train/1-w": 1.7243802547454834, "rewards_train/2-2": 1.797660231590271, "rewards_train/2-w": -0.6247643232345581, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.250990152359009, "rewards_train/margins_1": 2.1218805611133575, "rewards_train/margins_2": 2.422424554824829, "step": 120 }, { "epoch": 0.36, "logps_train/policy_1_2": -195.24839782714844, "logps_train/policy_1_l": -176.65859985351562, "logps_train/policy_1_w": -149.2401123046875, "logps_train/policy_2_2": -152.26760864257812, "logps_train/policy_2_w": -201.26828002929688, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -0.370151549577713, "rewards_train/1-l": -1.7773828506469727, "rewards_train/1-w": 2.3916149139404297, "rewards_train/2-2": 2.0740206241607666, "rewards_train/2-w": -0.5340547561645508, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.168997764587402, "rewards_train/margins_1": 2.7617664635181427, "rewards_train/margins_2": 2.6080753803253174, "step": 120 }, { "epoch": 0.36, "logps_train/policy_1_2": -185.2297821044922, "logps_train/policy_1_l": -221.7506103515625, "logps_train/policy_1_w": -145.1351776123047, "logps_train/policy_2_2": -138.00592041015625, "logps_train/policy_2_w": -202.12222290039062, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -1.0233677625656128, "rewards_train/1-l": -1.722473382949829, "rewards_train/1-w": 2.077106475830078, "rewards_train/2-2": 1.4201123714447021, "rewards_train/2-w": -0.9208171963691711, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7995798587799072, "rewards_train/margins_1": 3.100474238395691, "rewards_train/margins_2": 2.3409295678138733, "step": 121 }, { "epoch": 0.36, "logps_train/policy_1_2": -162.703857421875, "logps_train/policy_1_l": -174.48834228515625, "logps_train/policy_1_w": -141.14791870117188, "logps_train/policy_2_2": -121.83940124511719, "logps_train/policy_2_w": -189.74371337890625, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -0.4326900243759155, "rewards_train/1-l": -1.8260810375213623, "rewards_train/1-w": 1.9287638664245605, "rewards_train/2-2": 1.647700309753418, "rewards_train/2-w": -1.1534737348556519, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.754844903945923, "rewards_train/margins_1": 2.361453890800476, "rewards_train/margins_2": 2.80117404460907, "step": 121 }, { "epoch": 0.36, "logps_train/policy_1_2": -173.71673583984375, "logps_train/policy_1_l": -209.29481506347656, "logps_train/policy_1_w": -127.26416015625, "logps_train/policy_2_2": -123.78245544433594, "logps_train/policy_2_w": -184.58216857910156, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -0.9747987985610962, "rewards_train/1-l": -3.101747512817383, "rewards_train/1-w": 1.666454792022705, "rewards_train/2-2": 1.965895652770996, "rewards_train/2-w": -1.3920061588287354, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.768202304840088, "rewards_train/margins_1": 2.6412535905838013, "rewards_train/margins_2": 3.3579018115997314, "step": 121 }, { "epoch": 0.36, "logps_train/policy_1_2": -183.10081481933594, "logps_train/policy_1_l": -210.97203063964844, "logps_train/policy_1_w": -128.67506408691406, "logps_train/policy_2_2": -142.56723022460938, "logps_train/policy_2_w": -175.00564575195312, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -0.46555060148239136, "rewards_train/1-l": -1.880014419555664, "rewards_train/1-w": 2.147728443145752, "rewards_train/2-2": 1.8553858995437622, "rewards_train/2-w": -0.296268492937088, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.027742862701416, "rewards_train/margins_1": 2.6132790446281433, "rewards_train/margins_2": 2.15165439248085, "step": 121 }, { "epoch": 0.37, "learning_rate": 4.767506037004344e-06, "loss": 1.146, "step": 122 }, { "epoch": 0.37, "logps_train/policy_1_2": -149.5859375, "logps_train/policy_1_l": -144.00393676757812, "logps_train/policy_1_w": -99.8121109008789, "logps_train/policy_2_2": -105.36447143554688, "logps_train/policy_2_w": -135.46975708007812, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -0.9351556897163391, "rewards_train/1-l": -1.9796907901763916, "rewards_train/1-w": 1.4672263860702515, "rewards_train/2-2": 1.65964674949646, "rewards_train/2-w": -0.575491726398468, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.446917176246643, "rewards_train/margins_1": 2.4023820757865906, "rewards_train/margins_2": 2.235138475894928, "step": 122 }, { "epoch": 0.37, "logps_train/policy_1_2": -212.8235626220703, "logps_train/policy_1_l": -245.7263641357422, "logps_train/policy_1_w": -154.544921875, "logps_train/policy_2_2": -148.6534423828125, "logps_train/policy_2_w": -216.0308074951172, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -221.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -1.4835281372070312, "rewards_train/1-l": -2.5019326210021973, "rewards_train/1-w": 2.7638673782348633, "rewards_train/2-2": 2.338561773300171, "rewards_train/2-w": -0.9314022660255432, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.2657999992370605, "rewards_train/margins_1": 4.2473955154418945, "rewards_train/margins_2": 3.269964039325714, "step": 122 }, { "epoch": 0.37, "logps_train/policy_1_2": -214.67837524414062, "logps_train/policy_1_l": -142.89749145507812, "logps_train/policy_1_w": -130.0361785888672, "logps_train/policy_2_2": -143.24729919433594, "logps_train/policy_2_w": -197.06103515625, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -1.1670565605163574, "rewards_train/1-l": -1.4036157131195068, "rewards_train/1-w": 2.456930160522461, "rewards_train/2-2": 2.8057384490966797, "rewards_train/2-w": -1.3857905864715576, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8605458736419678, "rewards_train/margins_1": 3.6239867210388184, "rewards_train/margins_2": 4.191529035568237, "step": 122 }, { "epoch": 0.37, "logps_train/policy_1_2": -132.7974395751953, "logps_train/policy_1_l": -99.80699920654297, "logps_train/policy_1_w": -87.87629699707031, "logps_train/policy_2_2": -108.47588348388672, "logps_train/policy_2_w": -113.15281677246094, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": 0.3709951937198639, "rewards_train/1-l": -0.6125784516334534, "rewards_train/1-w": 1.2888903617858887, "rewards_train/2-2": 1.6230478286743164, "rewards_train/2-w": -0.0036302506923675537, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 1.901468813419342, "rewards_train/margins_1": 0.9178951680660248, "rewards_train/margins_2": 1.626678079366684, "step": 122 }, { "epoch": 0.37, "logps_train/policy_1_2": -178.17991638183594, "logps_train/policy_1_l": -199.63096618652344, "logps_train/policy_1_w": -98.40865325927734, "logps_train/policy_2_2": -130.86056518554688, "logps_train/policy_2_w": -136.1082763671875, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": -0.6222877502441406, "rewards_train/1-l": -2.4712507724761963, "rewards_train/1-w": 1.4650425910949707, "rewards_train/2-2": 2.036208152770996, "rewards_train/2-w": -0.49227219820022583, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.936293363571167, "rewards_train/margins_1": 2.0873303413391113, "rewards_train/margins_2": 2.528480350971222, "step": 123 }, { "epoch": 0.37, "logps_train/policy_1_2": -191.56146240234375, "logps_train/policy_1_l": -159.0145263671875, "logps_train/policy_1_w": -127.99847412109375, "logps_train/policy_2_2": -135.890869140625, "logps_train/policy_2_w": -187.27008056640625, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -0.4944272041320801, "rewards_train/1-l": -2.0036025047302246, "rewards_train/1-w": 2.2478084564208984, "rewards_train/2-2": 2.4886474609375, "rewards_train/2-w": -0.7549378871917725, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.251410961151123, "rewards_train/margins_1": 2.7422356605529785, "rewards_train/margins_2": 3.2435853481292725, "step": 123 }, { "epoch": 0.37, "logps_train/policy_1_2": -203.4757080078125, "logps_train/policy_1_l": -148.8141326904297, "logps_train/policy_1_w": -107.78170776367188, "logps_train/policy_2_2": -137.4768524169922, "logps_train/policy_2_w": -165.54928588867188, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -1.5467896461486816, "rewards_train/1-l": -1.3792157173156738, "rewards_train/1-w": 1.5558135509490967, "rewards_train/2-2": 1.9835642576217651, "rewards_train/2-w": -1.4481912851333618, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.9350292682647705, "rewards_train/margins_1": 3.1026031970977783, "rewards_train/margins_2": 3.431755542755127, "step": 123 }, { "epoch": 0.37, "logps_train/policy_1_2": -215.87086486816406, "logps_train/policy_1_l": -185.8909912109375, "logps_train/policy_1_w": -96.71578979492188, "logps_train/policy_2_2": -151.3043670654297, "logps_train/policy_2_w": -145.22271728515625, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -2.278101921081543, "rewards_train/1-l": -2.2630746364593506, "rewards_train/1-w": 1.6439483165740967, "rewards_train/2-2": 1.688704252243042, "rewards_train/2-w": -1.0450252294540405, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9070229530334473, "rewards_train/margins_1": 3.9220502376556396, "rewards_train/margins_2": 2.7337294816970825, "step": 123 }, { "epoch": 0.37, "learning_rate": 4.756994574914359e-06, "loss": 1.1161, "step": 124 }, { "epoch": 0.37, "logps_train/policy_1_2": -132.58346557617188, "logps_train/policy_1_l": -150.7397003173828, "logps_train/policy_1_w": -105.78081512451172, "logps_train/policy_2_2": -94.20640563964844, "logps_train/policy_2_w": -139.84864807128906, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": -0.9163541793823242, "rewards_train/1-l": -1.7472130060195923, "rewards_train/1-w": 1.5283634662628174, "rewards_train/2-2": 0.9994286894798279, "rewards_train/2-w": -0.43916207551956177, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2755764722824097, "rewards_train/margins_1": 2.4447176456451416, "rewards_train/margins_2": 1.4385907649993896, "step": 124 }, { "epoch": 0.37, "logps_train/policy_1_2": -160.14639282226562, "logps_train/policy_1_l": -126.28553009033203, "logps_train/policy_1_w": -111.94168090820312, "logps_train/policy_2_2": -110.9983901977539, "logps_train/policy_2_w": -155.53172302246094, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -0.8630757331848145, "rewards_train/1-l": -1.1712287664413452, "rewards_train/1-w": 2.4066131114959717, "rewards_train/2-2": 1.6415674686431885, "rewards_train/2-w": 0.0038593262434005737, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.577841877937317, "rewards_train/margins_1": 3.269688844680786, "rewards_train/margins_2": 1.637708142399788, "step": 124 }, { "epoch": 0.37, "logps_train/policy_1_2": -207.0832977294922, "logps_train/policy_1_l": -163.5070343017578, "logps_train/policy_1_w": -137.25083923339844, "logps_train/policy_2_2": -152.25965881347656, "logps_train/policy_2_w": -178.94784545898438, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.0012974739074707, "rewards_train/1-l": -1.7841020822525024, "rewards_train/1-w": 1.7344858646392822, "rewards_train/2-2": 2.1154403686523438, "rewards_train/2-w": -0.5768159031867981, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.5185879468917847, "rewards_train/margins_1": 2.735783338546753, "rewards_train/margins_2": 2.692256271839142, "step": 124 }, { "epoch": 0.37, "logps_train/policy_1_2": -193.0675048828125, "logps_train/policy_1_l": -133.44924926757812, "logps_train/policy_1_w": -106.3878173828125, "logps_train/policy_2_2": -141.1986541748047, "logps_train/policy_2_w": -148.89862060546875, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -0.6352648735046387, "rewards_train/1-l": -1.4167509078979492, "rewards_train/1-w": 1.506481647491455, "rewards_train/2-2": 2.279744863510132, "rewards_train/2-w": -0.7367377281188965, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.9232325553894043, "rewards_train/margins_1": 2.1417465209960938, "rewards_train/margins_2": 3.0164825916290283, "step": 124 }, { "epoch": 0.37, "logps_train/policy_1_2": -81.63484954833984, "logps_train/policy_1_l": -92.61322021484375, "logps_train/policy_1_w": -68.82612609863281, "logps_train/policy_2_2": -55.81745529174805, "logps_train/policy_2_w": -98.4971923828125, "logps_train/ref_1_2": -80.0, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -67.0, "logps_train/ref_2_w": -94.5, "rewards_train/1-2": -0.17325076460838318, "rewards_train/1-l": -1.2379100322723389, "rewards_train/1-w": 1.0843790769577026, "rewards_train/2-2": 1.1190357208251953, "rewards_train/2-w": -0.4061647355556488, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.3222891092300415, "rewards_train/margins_1": 1.2576298415660858, "rewards_train/margins_2": 1.5252004563808441, "step": 125 }, { "epoch": 0.37, "logps_train/policy_1_2": -173.10385131835938, "logps_train/policy_1_l": -181.17205810546875, "logps_train/policy_1_w": -142.76913452148438, "logps_train/policy_2_2": -129.59268188476562, "logps_train/policy_2_w": -186.0988006591797, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -0.7166359424591064, "rewards_train/1-l": -1.94767427444458, "rewards_train/1-w": 1.613710880279541, "rewards_train/2-2": 1.9684672355651855, "rewards_train/2-w": -0.7059742212295532, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.561385154724121, "rewards_train/margins_1": 2.3303468227386475, "rewards_train/margins_2": 2.6744414567947388, "step": 125 }, { "epoch": 0.37, "logps_train/policy_1_2": -186.32598876953125, "logps_train/policy_1_l": -179.5838165283203, "logps_train/policy_1_w": -134.58523559570312, "logps_train/policy_2_2": -128.97698974609375, "logps_train/policy_2_w": -200.14675903320312, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.0505677461624146, "rewards_train/1-l": -2.4866044521331787, "rewards_train/1-w": 1.9339574575424194, "rewards_train/2-2": 2.1665596961975098, "rewards_train/2-w": -1.6648715734481812, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.420561909675598, "rewards_train/margins_1": 2.984525203704834, "rewards_train/margins_2": 3.831431269645691, "step": 125 }, { "epoch": 0.37, "logps_train/policy_1_2": -262.21795654296875, "logps_train/policy_1_l": -187.0323028564453, "logps_train/policy_1_w": -148.9962921142578, "logps_train/policy_2_2": -198.8370361328125, "logps_train/policy_2_w": -203.67112731933594, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.2563657760620117, "rewards_train/1-l": -1.384089469909668, "rewards_train/1-w": 2.1708784103393555, "rewards_train/2-2": 2.345593214035034, "rewards_train/2-w": -0.5553938150405884, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.5549678802490234, "rewards_train/margins_1": 3.427244186401367, "rewards_train/margins_2": 2.9009870290756226, "step": 125 }, { "epoch": 0.38, "learning_rate": 4.7462628341327e-06, "loss": 1.2265, "step": 126 }, { "epoch": 0.38, "logps_train/policy_1_2": -150.51806640625, "logps_train/policy_1_l": -157.75625610351562, "logps_train/policy_1_w": -142.67337036132812, "logps_train/policy_2_2": -103.78931427001953, "logps_train/policy_2_w": -198.1548614501953, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.5971185564994812, "rewards_train/1-l": -1.8610259294509888, "rewards_train/1-w": 2.0669407844543457, "rewards_train/2-2": 1.7409902811050415, "rewards_train/2-w": -0.8183183670043945, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.9279667139053345, "rewards_train/margins_1": 2.664059340953827, "rewards_train/margins_2": 2.559308648109436, "step": 126 }, { "epoch": 0.38, "logps_train/policy_1_2": -145.58799743652344, "logps_train/policy_1_l": -128.16195678710938, "logps_train/policy_1_w": -85.65386199951172, "logps_train/policy_2_2": -110.67948913574219, "logps_train/policy_2_w": -128.9739532470703, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": -0.43907299637794495, "rewards_train/1-l": -1.798592209815979, "rewards_train/1-w": 1.845160722732544, "rewards_train/2-2": 1.4463086128234863, "rewards_train/2-w": -0.4958326816558838, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.643752932548523, "rewards_train/margins_1": 2.284233719110489, "rewards_train/margins_2": 1.9421412944793701, "step": 126 }, { "epoch": 0.38, "logps_train/policy_1_2": -153.46981811523438, "logps_train/policy_1_l": -122.4456558227539, "logps_train/policy_1_w": -103.62616729736328, "logps_train/policy_2_2": -103.25460815429688, "logps_train/policy_2_w": -156.78050231933594, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -1.2333095073699951, "rewards_train/1-l": -1.3601664304733276, "rewards_train/1-w": 1.9359920024871826, "rewards_train/2-2": 1.7788361310958862, "rewards_train/2-w": -1.3129128217697144, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2961584329605103, "rewards_train/margins_1": 3.1693015098571777, "rewards_train/margins_2": 3.0917489528656006, "step": 126 }, { "epoch": 0.38, "logps_train/policy_1_2": -154.0302734375, "logps_train/policy_1_l": -154.57579040527344, "logps_train/policy_1_w": -114.25414276123047, "logps_train/policy_2_2": -105.30154418945312, "logps_train/policy_2_w": -166.00706481933594, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -0.7651847004890442, "rewards_train/1-l": -1.718515157699585, "rewards_train/1-w": 2.1269783973693848, "rewards_train/2-2": 1.7676481008529663, "rewards_train/2-w": -0.8958237171173096, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8454935550689697, "rewards_train/margins_1": 2.892163097858429, "rewards_train/margins_2": 2.663471817970276, "step": 126 }, { "epoch": 0.38, "logps_train/policy_1_2": -175.57080078125, "logps_train/policy_1_l": -166.90597534179688, "logps_train/policy_1_w": -94.33253479003906, "logps_train/policy_2_2": -129.62493896484375, "logps_train/policy_2_w": -144.03384399414062, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -108.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": -0.600830614566803, "rewards_train/1-l": -1.411203145980835, "rewards_train/1-w": 1.4090315103530884, "rewards_train/2-2": 1.796881914138794, "rewards_train/2-w": -1.1008447408676147, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.8202346563339233, "rewards_train/margins_1": 2.0098621249198914, "rewards_train/margins_2": 2.8977266550064087, "step": 127 }, { "epoch": 0.38, "logps_train/policy_1_2": -244.38558959960938, "logps_train/policy_1_l": -197.6014404296875, "logps_train/policy_1_w": -130.87033081054688, "logps_train/policy_2_2": -178.033447265625, "logps_train/policy_2_w": -178.03384399414062, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.5682480335235596, "rewards_train/1-l": -1.7900274991989136, "rewards_train/1-w": 2.075467348098755, "rewards_train/2-2": 2.5736069679260254, "rewards_train/2-w": -0.6506500840187073, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8654948472976685, "rewards_train/margins_1": 3.6437153816223145, "rewards_train/margins_2": 3.2242570519447327, "step": 127 }, { "epoch": 0.38, "logps_train/policy_1_2": -114.78842163085938, "logps_train/policy_1_l": -112.1162109375, "logps_train/policy_1_w": -88.11708068847656, "logps_train/policy_2_2": -77.89541625976562, "logps_train/policy_2_w": -138.8201904296875, "logps_train/ref_1_2": -104.5, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": -1.01790452003479, "rewards_train/1-l": -1.246387004852295, "rewards_train/1-w": 1.4492297172546387, "rewards_train/2-2": 0.9019992351531982, "rewards_train/2-w": -1.3472530841827393, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.6956167221069336, "rewards_train/margins_1": 2.4671342372894287, "rewards_train/margins_2": 2.2492523193359375, "step": 127 }, { "epoch": 0.38, "logps_train/policy_1_2": -161.2577667236328, "logps_train/policy_1_l": -116.94271850585938, "logps_train/policy_1_w": -68.58966827392578, "logps_train/policy_2_2": -117.9489974975586, "logps_train/policy_2_w": -94.00944519042969, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -80.5, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -94.0, "rewards_train/1-2": -0.4914020597934723, "rewards_train/1-l": -1.0725929737091064, "rewards_train/1-w": 1.1847347021102905, "rewards_train/2-2": 1.931272268295288, "rewards_train/2-w": 0.004231661558151245, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.257327675819397, "rewards_train/margins_1": 1.6761367619037628, "rewards_train/margins_2": 1.9270406067371368, "step": 127 }, { "epoch": 0.38, "learning_rate": 4.7353118620583464e-06, "loss": 1.2294, "step": 128 }, { "epoch": 0.38, "logps_train/policy_1_2": -202.15235900878906, "logps_train/policy_1_l": -198.28363037109375, "logps_train/policy_1_w": -125.69013214111328, "logps_train/policy_2_2": -139.30233764648438, "logps_train/policy_2_w": -190.2549285888672, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.4078134298324585, "rewards_train/1-l": -2.4087343215942383, "rewards_train/1-w": 1.8805959224700928, "rewards_train/2-2": 2.1986846923828125, "rewards_train/2-w": -1.760295033454895, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.289330244064331, "rewards_train/margins_1": 3.2884093523025513, "rewards_train/margins_2": 3.9589797258377075, "step": 128 }, { "epoch": 0.38, "logps_train/policy_1_2": -203.75198364257812, "logps_train/policy_1_l": -176.72091674804688, "logps_train/policy_1_w": -155.81068420410156, "logps_train/policy_2_2": -158.00003051757812, "logps_train/policy_2_w": -196.49880981445312, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -0.9041038751602173, "rewards_train/1-l": -1.9824912548065186, "rewards_train/1-w": 1.8415882587432861, "rewards_train/2-2": 1.9802706241607666, "rewards_train/2-w": -0.36589640378952026, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.8240795135498047, "rewards_train/margins_1": 2.7456921339035034, "rewards_train/margins_2": 2.346167027950287, "step": 128 }, { "epoch": 0.38, "logps_train/policy_1_2": -159.34359741210938, "logps_train/policy_1_l": -153.55490112304688, "logps_train/policy_1_w": -123.88681030273438, "logps_train/policy_2_2": -105.48572540283203, "logps_train/policy_2_w": -178.22129821777344, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.4091527462005615, "rewards_train/1-l": -2.108504056930542, "rewards_train/1-w": 2.472109794616699, "rewards_train/2-2": 1.7568960189819336, "rewards_train/2-w": -0.63599693775177, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.580613851547241, "rewards_train/margins_1": 3.8812625408172607, "rewards_train/margins_2": 2.3928929567337036, "step": 128 }, { "epoch": 0.38, "logps_train/policy_1_2": -254.28790283203125, "logps_train/policy_1_l": -183.59136962890625, "logps_train/policy_1_w": -107.21607208251953, "logps_train/policy_2_2": -179.19094848632812, "logps_train/policy_2_w": -165.4428253173828, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -207.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -1.8795706033706665, "rewards_train/1-l": -2.339679718017578, "rewards_train/1-w": 2.312328815460205, "rewards_train/2-2": 2.8262176513671875, "rewards_train/2-w": -0.7801952362060547, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.652008533477783, "rewards_train/margins_1": 4.191899418830872, "rewards_train/margins_2": 3.606412887573242, "step": 128 }, { "epoch": 0.39, "logps_train/policy_1_2": -126.8488998413086, "logps_train/policy_1_l": -200.9334716796875, "logps_train/policy_1_w": -137.65130615234375, "logps_train/policy_2_2": -99.53889465332031, "logps_train/policy_2_w": -197.5718994140625, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -0.3079371750354767, "rewards_train/1-l": -1.9519892930984497, "rewards_train/1-w": 1.7378966808319092, "rewards_train/2-2": 1.317399024963379, "rewards_train/2-w": -1.249767541885376, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.689885973930359, "rewards_train/margins_1": 2.045833855867386, "rewards_train/margins_2": 2.567166566848755, "step": 129 }, { "epoch": 0.39, "logps_train/policy_1_2": -164.354736328125, "logps_train/policy_1_l": -134.04656982421875, "logps_train/policy_1_w": -127.72846221923828, "logps_train/policy_2_2": -116.41250610351562, "logps_train/policy_2_w": -182.61007690429688, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.3878185749053955, "rewards_train/1-l": -1.6001644134521484, "rewards_train/1-w": 1.9482473134994507, "rewards_train/2-2": 1.4669528007507324, "rewards_train/2-w": -1.0850322246551514, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.548411726951599, "rewards_train/margins_1": 3.336065888404846, "rewards_train/margins_2": 2.551985025405884, "step": 129 }, { "epoch": 0.39, "logps_train/policy_1_2": -183.57806396484375, "logps_train/policy_1_l": -163.99847412109375, "logps_train/policy_1_w": -141.64500427246094, "logps_train/policy_2_2": -137.47947692871094, "logps_train/policy_2_w": -200.72998046875, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -0.7523376941680908, "rewards_train/1-l": -1.8448188304901123, "rewards_train/1-w": 2.041163921356201, "rewards_train/2-2": 1.8626000881195068, "rewards_train/2-w": -1.4335459470748901, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8859827518463135, "rewards_train/margins_1": 2.793501615524292, "rewards_train/margins_2": 3.296146035194397, "step": 129 }, { "epoch": 0.39, "logps_train/policy_1_2": -206.40504455566406, "logps_train/policy_1_l": -225.810791015625, "logps_train/policy_1_w": -186.90048217773438, "logps_train/policy_2_2": -154.1544647216797, "logps_train/policy_2_w": -251.27304077148438, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -209.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -237.0, "rewards_train/1-2": -0.49460509419441223, "rewards_train/1-l": -1.5370359420776367, "rewards_train/1-w": 2.21332049369812, "rewards_train/2-2": 2.5575995445251465, "rewards_train/2-w": -1.4363865852355957, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.750356435775757, "rewards_train/margins_1": 2.7079255878925323, "rewards_train/margins_2": 3.993986129760742, "step": 129 }, { "epoch": 0.39, "learning_rate": 4.724142727486869e-06, "loss": 1.0386, "step": 130 }, { "epoch": 0.39, "logps_train/policy_1_2": -246.99037170410156, "logps_train/policy_1_l": -229.39938354492188, "logps_train/policy_1_w": -166.9261474609375, "logps_train/policy_2_2": -179.96560668945312, "logps_train/policy_2_w": -238.01760864257812, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -1.5482547283172607, "rewards_train/1-l": -2.6570284366607666, "rewards_train/1-w": 2.6097288131713867, "rewards_train/2-2": 2.538595199584961, "rewards_train/2-w": -1.1306672096252441, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.266757249832153, "rewards_train/margins_1": 4.1579835414886475, "rewards_train/margins_2": 3.669262409210205, "step": 130 }, { "epoch": 0.39, "logps_train/policy_1_2": -236.83709716796875, "logps_train/policy_1_l": -154.52685546875, "logps_train/policy_1_w": -122.02532196044922, "logps_train/policy_2_2": -157.95338439941406, "logps_train/policy_2_w": -174.30279541015625, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -2.5423030853271484, "rewards_train/1-l": -1.5104970932006836, "rewards_train/1-w": 1.6176340579986572, "rewards_train/2-2": 2.079270839691162, "rewards_train/2-w": -1.067779541015625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.128131151199341, "rewards_train/margins_1": 4.159937143325806, "rewards_train/margins_2": 3.147050380706787, "step": 130 }, { "epoch": 0.39, "logps_train/policy_1_2": -147.65321350097656, "logps_train/policy_1_l": -156.16131591796875, "logps_train/policy_1_w": -115.6826400756836, "logps_train/policy_2_2": -115.0062255859375, "logps_train/policy_2_w": -150.6531524658203, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.5559462308883667, "rewards_train/1-l": -2.3073410987854004, "rewards_train/1-w": 1.7118136882781982, "rewards_train/2-2": 1.2239867448806763, "rewards_train/2-w": -0.28640925884246826, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.019154787063599, "rewards_train/margins_1": 2.267759919166565, "rewards_train/margins_2": 1.5103960037231445, "step": 130 }, { "epoch": 0.39, "logps_train/policy_1_2": -191.24810791015625, "logps_train/policy_1_l": -145.88441467285156, "logps_train/policy_1_w": -145.96829223632812, "logps_train/policy_2_2": -138.06704711914062, "logps_train/policy_2_w": -204.2375946044922, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.9810616970062256, "rewards_train/1-l": -1.9125628471374512, "rewards_train/1-w": 2.4684054851531982, "rewards_train/2-2": 1.3710302114486694, "rewards_train/2-w": -0.5788369178771973, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.380968332290649, "rewards_train/margins_1": 4.449467182159424, "rewards_train/margins_2": 1.9498671293258667, "step": 130 }, { "epoch": 0.39, "logps_train/policy_1_2": -182.8638153076172, "logps_train/policy_1_l": -175.75469970703125, "logps_train/policy_1_w": -163.87472534179688, "logps_train/policy_2_2": -129.3937225341797, "logps_train/policy_2_w": -234.14752197265625, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": -0.838481068611145, "rewards_train/1-l": -1.7174146175384521, "rewards_train/1-w": 1.8980743885040283, "rewards_train/2-2": 1.9153162240982056, "rewards_train/2-w": -2.1428771018981934, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6154890060424805, "rewards_train/margins_1": 2.7365554571151733, "rewards_train/margins_2": 4.058193325996399, "step": 131 }, { "epoch": 0.39, "logps_train/policy_1_2": -109.42033386230469, "logps_train/policy_1_l": -101.11631774902344, "logps_train/policy_1_w": -90.36631774902344, "logps_train/policy_2_2": -78.89469909667969, "logps_train/policy_2_w": -123.33811950683594, "logps_train/ref_1_2": -106.5, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -91.5, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": -0.2949628233909607, "rewards_train/1-l": -1.1091408729553223, "rewards_train/1-w": 1.1539932489395142, "rewards_train/2-2": 1.2582838535308838, "rewards_train/2-w": -0.6717029213905334, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.2631341218948364, "rewards_train/margins_1": 1.4489560723304749, "rewards_train/margins_2": 1.9299867749214172, "step": 131 }, { "epoch": 0.39, "logps_train/policy_1_2": -182.0708770751953, "logps_train/policy_1_l": -170.08566284179688, "logps_train/policy_1_w": -152.89097595214844, "logps_train/policy_2_2": -127.88407897949219, "logps_train/policy_2_w": -211.40118408203125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -0.6770092248916626, "rewards_train/1-l": -2.25085186958313, "rewards_train/1-w": 2.177600860595703, "rewards_train/2-2": 2.5027058124542236, "rewards_train/2-w": -1.520391583442688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.428452730178833, "rewards_train/margins_1": 2.8546100854873657, "rewards_train/margins_2": 4.023097395896912, "step": 131 }, { "epoch": 0.39, "logps_train/policy_1_2": -196.3229522705078, "logps_train/policy_1_l": -166.63668823242188, "logps_train/policy_1_w": -139.85202026367188, "logps_train/policy_2_2": -153.6468963623047, "logps_train/policy_2_w": -193.82168579101562, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -1.265889286994934, "rewards_train/1-l": -1.8045871257781982, "rewards_train/1-w": 2.3304226398468018, "rewards_train/2-2": 1.207772135734558, "rewards_train/2-w": -0.8977921009063721, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.135009765625, "rewards_train/margins_1": 3.596311926841736, "rewards_train/margins_2": 2.10556423664093, "step": 131 }, { "epoch": 0.4, "learning_rate": 4.71275652050611e-06, "loss": 1.2185, "step": 132 }, { "epoch": 0.4, "logps_train/policy_1_2": -211.8558349609375, "logps_train/policy_1_l": -176.08212280273438, "logps_train/policy_1_w": -133.1301727294922, "logps_train/policy_2_2": -153.10806274414062, "logps_train/policy_2_w": -197.0436248779297, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -0.9676140546798706, "rewards_train/1-l": -1.9375102519989014, "rewards_train/1-w": 1.7500680685043335, "rewards_train/2-2": 2.0958328247070312, "rewards_train/2-w": -1.7479171752929688, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.687578320503235, "rewards_train/margins_1": 2.717682123184204, "rewards_train/margins_2": 3.84375, "step": 132 }, { "epoch": 0.4, "logps_train/policy_1_2": -180.0786895751953, "logps_train/policy_1_l": -154.2718505859375, "logps_train/policy_1_w": -132.21775817871094, "logps_train/policy_2_2": -126.97512817382812, "logps_train/policy_2_w": -180.23609924316406, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.6221270561218262, "rewards_train/1-l": -1.3421759605407715, "rewards_train/1-w": 1.9981459379196167, "rewards_train/2-2": 1.8422822952270508, "rewards_train/2-w": -0.8509544134140015, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.340321898460388, "rewards_train/margins_1": 3.620272994041443, "rewards_train/margins_2": 2.6932367086410522, "step": 132 }, { "epoch": 0.4, "logps_train/policy_1_2": -171.686767578125, "logps_train/policy_1_l": -173.95228576660156, "logps_train/policy_1_w": -131.07545471191406, "logps_train/policy_2_2": -113.88380432128906, "logps_train/policy_2_w": -195.55841064453125, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.910083532333374, "rewards_train/1-l": -1.757826328277588, "rewards_train/1-w": 2.0682358741760254, "rewards_train/2-2": 1.6026356220245361, "rewards_train/2-w": -1.5386526584625244, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.8260622024536133, "rewards_train/margins_1": 3.9783194065093994, "rewards_train/margins_2": 3.1412882804870605, "step": 132 }, { "epoch": 0.4, "logps_train/policy_1_2": -132.56326293945312, "logps_train/policy_1_l": -116.78987121582031, "logps_train/policy_1_w": -104.47473907470703, "logps_train/policy_2_2": -90.63163757324219, "logps_train/policy_2_w": -145.0475311279297, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": -0.35632529854774475, "rewards_train/1-l": -1.2798656225204468, "rewards_train/1-w": 2.3927605152130127, "rewards_train/2-2": 1.9966020584106445, "rewards_train/2-w": -0.06237046420574188, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6726261377334595, "rewards_train/margins_1": 2.7490858137607574, "rewards_train/margins_2": 2.0589725226163864, "step": 132 }, { "epoch": 0.4, "logps_train/policy_1_2": -127.01595306396484, "logps_train/policy_1_l": -120.297607421875, "logps_train/policy_1_w": -105.92835998535156, "logps_train/policy_2_2": -91.67413330078125, "logps_train/policy_2_w": -154.28607177734375, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -0.8430016040802002, "rewards_train/1-l": -1.4533934593200684, "rewards_train/1-w": 1.65784752368927, "rewards_train/2-2": 1.2591490745544434, "rewards_train/2-w": -1.3614193201065063, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.1112409830093384, "rewards_train/margins_1": 2.50084912776947, "rewards_train/margins_2": 2.6205683946609497, "step": 133 }, { "epoch": 0.4, "logps_train/policy_1_2": -123.5268783569336, "logps_train/policy_1_l": -126.907958984375, "logps_train/policy_1_w": -92.00572204589844, "logps_train/policy_2_2": -88.46614074707031, "logps_train/policy_2_w": -113.03356170654297, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -107.5, "logps_train/ref_1_w": -102.5, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -112.0, "rewards_train/1-2": -0.5024442672729492, "rewards_train/1-l": -1.9241456985473633, "rewards_train/1-w": 1.068324327468872, "rewards_train/2-2": 1.645329236984253, "rewards_train/2-w": -0.0873899757862091, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9924700260162354, "rewards_train/margins_1": 1.5707685947418213, "rewards_train/margins_2": 1.732719212770462, "step": 133 }, { "epoch": 0.4, "logps_train/policy_1_2": -189.26351928710938, "logps_train/policy_1_l": -171.1552734375, "logps_train/policy_1_w": -96.539306640625, "logps_train/policy_2_2": -127.5521240234375, "logps_train/policy_2_w": -150.9257049560547, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -0.8677588701248169, "rewards_train/1-l": -2.3234124183654785, "rewards_train/1-w": 1.7753658294677734, "rewards_train/2-2": 2.45220947265625, "rewards_train/2-w": -1.0558509826660156, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.098778247833252, "rewards_train/margins_1": 2.6431246995925903, "rewards_train/margins_2": 3.5080604553222656, "step": 133 }, { "epoch": 0.4, "logps_train/policy_1_2": -177.73928833007812, "logps_train/policy_1_l": -173.78326416015625, "logps_train/policy_1_w": -179.67608642578125, "logps_train/policy_2_2": -124.20897674560547, "logps_train/policy_2_w": -245.18064880371094, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": -0.6825234889984131, "rewards_train/1-l": -1.1543769836425781, "rewards_train/1-w": 1.9456732273101807, "rewards_train/2-2": 2.282618522644043, "rewards_train/2-w": -1.6426736116409302, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.100050210952759, "rewards_train/margins_1": 2.6281967163085938, "rewards_train/margins_2": 3.925292134284973, "step": 133 }, { "epoch": 0.4, "learning_rate": 4.7011543523898e-06, "loss": 1.2216, "step": 134 }, { "epoch": 0.4, "logps_train/policy_1_2": -216.53346252441406, "logps_train/policy_1_l": -221.49722290039062, "logps_train/policy_1_w": -128.57388305664062, "logps_train/policy_2_2": -165.2962646484375, "logps_train/policy_2_w": -180.26223754882812, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.4073491096496582, "rewards_train/1-l": -2.3866848945617676, "rewards_train/1-w": 1.8213229179382324, "rewards_train/2-2": 1.6081173419952393, "rewards_train/2-w": -0.6770033836364746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.2080078125, "rewards_train/margins_1": 3.2286720275878906, "rewards_train/margins_2": 2.285120725631714, "step": 134 }, { "epoch": 0.4, "logps_train/policy_1_2": -181.50074768066406, "logps_train/policy_1_l": -221.02438354492188, "logps_train/policy_1_w": -151.9240264892578, "logps_train/policy_2_2": -128.99623107910156, "logps_train/policy_2_w": -218.62673950195312, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -0.9688252210617065, "rewards_train/1-l": -2.540437936782837, "rewards_train/1-w": 2.3485031127929688, "rewards_train/2-2": 1.780260443687439, "rewards_train/2-w": -0.9241002798080444, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.888941049575806, "rewards_train/margins_1": 3.3173283338546753, "rewards_train/margins_2": 2.7043607234954834, "step": 134 }, { "epoch": 0.4, "logps_train/policy_1_2": -237.44314575195312, "logps_train/policy_1_l": -211.7184295654297, "logps_train/policy_1_w": -152.6238250732422, "logps_train/policy_2_2": -159.89004516601562, "logps_train/policy_2_w": -212.87576293945312, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": -3.207791566848755, "rewards_train/1-l": -2.282780170440674, "rewards_train/1-w": 2.0133981704711914, "rewards_train/2-2": 1.866464614868164, "rewards_train/2-w": -1.3887478113174438, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.296178340911865, "rewards_train/margins_1": 5.221189737319946, "rewards_train/margins_2": 3.255212426185608, "step": 134 }, { "epoch": 0.4, "logps_train/policy_1_2": -120.02297973632812, "logps_train/policy_1_l": -168.69717407226562, "logps_train/policy_1_w": -97.82920837402344, "logps_train/policy_2_2": -85.02080535888672, "logps_train/policy_2_w": -149.66661071777344, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -96.5, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": -0.7901885509490967, "rewards_train/1-l": -1.1113184690475464, "rewards_train/1-w": 1.914149284362793, "rewards_train/2-2": 1.13112211227417, "rewards_train/2-w": -1.0674424171447754, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.0254677534103394, "rewards_train/margins_1": 2.7043378353118896, "rewards_train/margins_2": 2.1985645294189453, "step": 134 }, { "epoch": 0.4, "logps_train/policy_1_2": -188.43035888671875, "logps_train/policy_1_l": -156.47073364257812, "logps_train/policy_1_w": -112.54503631591797, "logps_train/policy_2_2": -129.06886291503906, "logps_train/policy_2_w": -161.66143798828125, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -1.0549498796463013, "rewards_train/1-l": -1.2502477169036865, "rewards_train/1-w": 1.8800668716430664, "rewards_train/2-2": 2.419480800628662, "rewards_train/2-w": -0.8804022073745728, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.130314588546753, "rewards_train/margins_1": 2.9350167512893677, "rewards_train/margins_2": 3.299883008003235, "step": 135 }, { "epoch": 0.4, "logps_train/policy_1_2": -129.61544799804688, "logps_train/policy_1_l": -147.765625, "logps_train/policy_1_w": -119.1037826538086, "logps_train/policy_2_2": -102.83110046386719, "logps_train/policy_2_w": -154.95172119140625, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -0.099044069647789, "rewards_train/1-l": -1.3273441791534424, "rewards_train/1-w": 1.7509498596191406, "rewards_train/2-2": 1.2971630096435547, "rewards_train/2-w": -0.15591448545455933, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.078294038772583, "rewards_train/margins_1": 1.8499939292669296, "rewards_train/margins_2": 1.453077495098114, "step": 135 }, { "epoch": 0.4, "logps_train/policy_1_2": -130.64752197265625, "logps_train/policy_1_l": -128.83505249023438, "logps_train/policy_1_w": -108.78282165527344, "logps_train/policy_2_2": -96.36847686767578, "logps_train/policy_2_w": -150.74343872070312, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -124.5, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -0.5155333280563354, "rewards_train/1-l": -1.184481143951416, "rewards_train/1-w": 1.5740617513656616, "rewards_train/2-2": 1.492449402809143, "rewards_train/2-w": -0.6966098546981812, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.7585428953170776, "rewards_train/margins_1": 2.089595079421997, "rewards_train/margins_2": 2.189059257507324, "step": 135 }, { "epoch": 0.4, "logps_train/policy_1_2": -160.10142517089844, "logps_train/policy_1_l": -177.3291473388672, "logps_train/policy_1_w": -135.78273010253906, "logps_train/policy_2_2": -120.26278686523438, "logps_train/policy_2_w": -174.27816772460938, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.31756430864334106, "rewards_train/1-l": -2.282524585723877, "rewards_train/1-w": 1.5977039337158203, "rewards_train/2-2": 1.909659504890442, "rewards_train/2-w": -0.4278176426887512, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8802285194396973, "rewards_train/margins_1": 1.9152682423591614, "rewards_train/margins_2": 2.337477147579193, "step": 135 }, { "epoch": 0.41, "learning_rate": 4.689337355489092e-06, "loss": 1.2518, "step": 136 }, { "epoch": 0.41, "logps_train/policy_1_2": -220.8584442138672, "logps_train/policy_1_l": -186.16500854492188, "logps_train/policy_1_w": -156.9225616455078, "logps_train/policy_2_2": -147.8065185546875, "logps_train/policy_2_w": -234.18106079101562, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -1.56475031375885, "rewards_train/1-l": -1.890428066253662, "rewards_train/1-w": 2.8042280673980713, "rewards_train/2-2": 2.809192180633545, "rewards_train/2-w": -1.4798250198364258, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.694656133651733, "rewards_train/margins_1": 4.368978381156921, "rewards_train/margins_2": 4.289017200469971, "step": 136 }, { "epoch": 0.41, "logps_train/policy_1_2": -206.07643127441406, "logps_train/policy_1_l": -182.8710479736328, "logps_train/policy_1_w": -137.0746612548828, "logps_train/policy_2_2": -143.86953735351562, "logps_train/policy_2_w": -196.8168182373047, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.013991117477417, "rewards_train/1-l": -1.7272417545318604, "rewards_train/1-w": 2.1448769569396973, "rewards_train/2-2": 2.1909751892089844, "rewards_train/2-w": -1.1473075151443481, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8721187114715576, "rewards_train/margins_1": 3.1588680744171143, "rewards_train/margins_2": 3.3382827043533325, "step": 136 }, { "epoch": 0.41, "logps_train/policy_1_2": -242.31509399414062, "logps_train/policy_1_l": -189.22816467285156, "logps_train/policy_1_w": -149.70901489257812, "logps_train/policy_2_2": -155.62681579589844, "logps_train/policy_2_w": -225.98280334472656, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -2.1932289600372314, "rewards_train/1-l": -1.5561165809631348, "rewards_train/1-w": 2.163083076477051, "rewards_train/2-2": 2.7654433250427246, "rewards_train/2-w": -2.0685925483703613, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.7191996574401855, "rewards_train/margins_1": 4.356312036514282, "rewards_train/margins_2": 4.834035873413086, "step": 136 }, { "epoch": 0.41, "logps_train/policy_1_2": -131.32882690429688, "logps_train/policy_1_l": -129.54837036132812, "logps_train/policy_1_w": -80.92745971679688, "logps_train/policy_2_2": -85.88839721679688, "logps_train/policy_2_w": -116.80043029785156, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": -0.7305395603179932, "rewards_train/1-l": -1.5910195112228394, "rewards_train/1-w": 1.6068644523620605, "rewards_train/2-2": 1.652566909790039, "rewards_train/2-w": -0.21344207227230072, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1978839635849, "rewards_train/margins_1": 2.3374040126800537, "rewards_train/margins_2": 1.8660089820623398, "step": 136 }, { "epoch": 0.41, "logps_train/policy_1_2": -208.48460388183594, "logps_train/policy_1_l": -215.29104614257812, "logps_train/policy_1_w": -157.2036895751953, "logps_train/policy_2_2": -151.54934692382812, "logps_train/policy_2_w": -237.74058532714844, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": -1.2152575254440308, "rewards_train/1-l": -2.550980567932129, "rewards_train/1-w": 2.346817970275879, "rewards_train/2-2": 1.9817850589752197, "rewards_train/2-w": -2.256870746612549, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.897798538208008, "rewards_train/margins_1": 3.5620754957199097, "rewards_train/margins_2": 4.2386558055877686, "step": 137 }, { "epoch": 0.41, "logps_train/policy_1_2": -182.35409545898438, "logps_train/policy_1_l": -222.77792358398438, "logps_train/policy_1_w": -118.82721710205078, "logps_train/policy_2_2": -121.54495239257812, "logps_train/policy_2_w": -177.58981323242188, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.4533789157867432, "rewards_train/1-l": -1.6246685981750488, "rewards_train/1-w": 2.2918877601623535, "rewards_train/2-2": 1.6476527452468872, "rewards_train/2-w": -0.9464811086654663, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9165563583374023, "rewards_train/margins_1": 3.7452666759490967, "rewards_train/margins_2": 2.5941338539123535, "step": 137 }, { "epoch": 0.41, "logps_train/policy_1_2": -188.31260681152344, "logps_train/policy_1_l": -204.28253173828125, "logps_train/policy_1_w": -161.61215209960938, "logps_train/policy_2_2": -153.6890869140625, "logps_train/policy_2_w": -195.4348907470703, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -0.10938611626625061, "rewards_train/1-l": -1.978350281715393, "rewards_train/1-w": 1.9963046312332153, "rewards_train/2-2": 1.8890985250473022, "rewards_train/2-w": 0.041764937341213226, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.9746549129486084, "rewards_train/margins_1": 2.105690747499466, "rewards_train/margins_2": 1.847333587706089, "step": 137 }, { "epoch": 0.41, "logps_train/policy_1_2": -230.8367156982422, "logps_train/policy_1_l": -178.55471801757812, "logps_train/policy_1_w": -151.55831909179688, "logps_train/policy_2_2": -156.4280242919922, "logps_train/policy_2_w": -219.7131805419922, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -0.4723427891731262, "rewards_train/1-l": -1.7787145376205444, "rewards_train/1-w": 2.221902847290039, "rewards_train/2-2": 3.452705144882202, "rewards_train/2-w": -1.6678016185760498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.0006173849105835, "rewards_train/margins_1": 2.6942456364631653, "rewards_train/margins_2": 5.120506763458252, "step": 137 }, { "epoch": 0.41, "learning_rate": 4.677306683122054e-06, "loss": 1.0201, "step": 138 }, { "epoch": 0.41, "logps_train/policy_1_2": -238.3477783203125, "logps_train/policy_1_l": -254.4385528564453, "logps_train/policy_1_w": -162.98153686523438, "logps_train/policy_2_2": -184.62667846679688, "logps_train/policy_2_w": -223.81503295898438, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -233.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -1.0379014015197754, "rewards_train/1-l": -2.103620767593384, "rewards_train/1-w": 2.2245023250579834, "rewards_train/2-2": 1.9781529903411865, "rewards_train/2-w": -1.3775972127914429, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.328123092651367, "rewards_train/margins_1": 3.262403726577759, "rewards_train/margins_2": 3.3557502031326294, "step": 138 }, { "epoch": 0.41, "logps_train/policy_1_2": -184.81842041015625, "logps_train/policy_1_l": -178.85986328125, "logps_train/policy_1_w": -119.69414520263672, "logps_train/policy_2_2": -134.568359375, "logps_train/policy_2_w": -161.52658081054688, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -0.905963659286499, "rewards_train/1-l": -1.8020025491714478, "rewards_train/1-w": 2.0997257232666016, "rewards_train/2-2": 2.14697265625, "rewards_train/2-w": -0.39992329478263855, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.9017282724380493, "rewards_train/margins_1": 3.0056893825531006, "rewards_train/margins_2": 2.5468959510326385, "step": 138 }, { "epoch": 0.41, "logps_train/policy_1_2": -127.8213882446289, "logps_train/policy_1_l": -155.8941192626953, "logps_train/policy_1_w": -101.00041961669922, "logps_train/policy_2_2": -88.14094543457031, "logps_train/policy_2_w": -141.49368286132812, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": 0.012001723051071167, "rewards_train/1-l": -1.5858964920043945, "rewards_train/1-w": 1.690680742263794, "rewards_train/2-2": 2.3468425273895264, "rewards_train/2-w": -0.6245638132095337, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.2765772342681885, "rewards_train/margins_1": 1.6786790192127228, "rewards_train/margins_2": 2.97140634059906, "step": 138 }, { "epoch": 0.41, "logps_train/policy_1_2": -182.3643798828125, "logps_train/policy_1_l": -229.47927856445312, "logps_train/policy_1_w": -159.47897338867188, "logps_train/policy_2_2": -137.35609436035156, "logps_train/policy_2_w": -223.28402709960938, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -0.4434686303138733, "rewards_train/1-l": -2.0840601921081543, "rewards_train/1-w": 2.012943983078003, "rewards_train/2-2": 2.1737656593322754, "rewards_train/2-w": -1.2989113330841064, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.097004175186157, "rewards_train/margins_1": 2.456412613391876, "rewards_train/margins_2": 3.472676992416382, "step": 138 }, { "epoch": 0.42, "logps_train/policy_1_2": -241.891357421875, "logps_train/policy_1_l": -214.79635620117188, "logps_train/policy_1_w": -164.73092651367188, "logps_train/policy_2_2": -180.11648559570312, "logps_train/policy_2_w": -225.20277404785156, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -0.5852300524711609, "rewards_train/1-l": -2.405366897583008, "rewards_train/1-w": 3.031203031539917, "rewards_train/2-2": 3.1127657890319824, "rewards_train/2-w": -0.48551204800605774, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.436569929122925, "rewards_train/margins_1": 3.616433084011078, "rewards_train/margins_2": 3.59827783703804, "step": 139 }, { "epoch": 0.42, "logps_train/policy_1_2": -147.62582397460938, "logps_train/policy_1_l": -137.37725830078125, "logps_train/policy_1_w": -115.92112731933594, "logps_train/policy_2_2": -101.22518920898438, "logps_train/policy_2_w": -168.30801391601562, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -0.748519778251648, "rewards_train/1-l": -1.3478827476501465, "rewards_train/1-w": 2.0514414310455322, "rewards_train/2-2": 1.5890042781829834, "rewards_train/2-w": -0.7706453800201416, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3993241786956787, "rewards_train/margins_1": 2.79996120929718, "rewards_train/margins_2": 2.359649658203125, "step": 139 }, { "epoch": 0.42, "logps_train/policy_1_2": -167.8502655029297, "logps_train/policy_1_l": -171.13014221191406, "logps_train/policy_1_w": -111.95812225341797, "logps_train/policy_2_2": -115.93119812011719, "logps_train/policy_2_w": -174.56060791015625, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -1.1783863306045532, "rewards_train/1-l": -1.6239526271820068, "rewards_train/1-w": 1.7881721258163452, "rewards_train/2-2": 1.9322700500488281, "rewards_train/2-w": -1.6478571891784668, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.412124752998352, "rewards_train/margins_1": 2.9665584564208984, "rewards_train/margins_2": 3.580127239227295, "step": 139 }, { "epoch": 0.42, "logps_train/policy_1_2": -216.76104736328125, "logps_train/policy_1_l": -192.7381591796875, "logps_train/policy_1_w": -153.9956817626953, "logps_train/policy_2_2": -157.47625732421875, "logps_train/policy_2_w": -213.24099731445312, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -1.1319636106491089, "rewards_train/1-l": -2.2730343341827393, "rewards_train/1-w": 2.9410576820373535, "rewards_train/2-2": 2.625030040740967, "rewards_train/2-w": -0.37165865302085876, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.214092016220093, "rewards_train/margins_1": 4.073021292686462, "rewards_train/margins_2": 2.9966886937618256, "step": 139 }, { "epoch": 0.42, "learning_rate": 4.665063509461098e-06, "loss": 1.0182, "step": 140 }, { "epoch": 0.42, "logps_train/policy_1_2": -170.23751831054688, "logps_train/policy_1_l": -175.3545684814453, "logps_train/policy_1_w": -146.0850372314453, "logps_train/policy_2_2": -128.59996032714844, "logps_train/policy_2_w": -209.34506225585938, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -0.9206260442733765, "rewards_train/1-l": -1.1348706483840942, "rewards_train/1-w": 2.0012621879577637, "rewards_train/2-2": 1.4814105033874512, "rewards_train/2-w": -0.7487644553184509, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.136132836341858, "rewards_train/margins_1": 2.92188823223114, "rewards_train/margins_2": 2.230174958705902, "step": 140 }, { "epoch": 0.42, "logps_train/policy_1_2": -146.9374237060547, "logps_train/policy_1_l": -165.73287963867188, "logps_train/policy_1_w": -90.11705780029297, "logps_train/policy_2_2": -98.25910949707031, "logps_train/policy_2_w": -133.62457275390625, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -121.5, "rewards_train/1-2": -1.4705002307891846, "rewards_train/1-l": -1.2619593143463135, "rewards_train/1-w": 1.1164195537567139, "rewards_train/2-2": 1.4881523847579956, "rewards_train/2-w": -1.2265198230743408, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.3783788681030273, "rewards_train/margins_1": 2.5869197845458984, "rewards_train/margins_2": 2.7146722078323364, "step": 140 }, { "epoch": 0.42, "logps_train/policy_1_2": -200.20025634765625, "logps_train/policy_1_l": -169.75247192382812, "logps_train/policy_1_w": -122.34388732910156, "logps_train/policy_2_2": -139.95654296875, "logps_train/policy_2_w": -165.59967041015625, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.0997143983840942, "rewards_train/1-l": -1.3805203437805176, "rewards_train/1-w": 2.154674530029297, "rewards_train/2-2": 2.4393064975738525, "rewards_train/2-w": -0.41609472036361694, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5351948738098145, "rewards_train/margins_1": 3.254388928413391, "rewards_train/margins_2": 2.8554012179374695, "step": 140 }, { "epoch": 0.42, "logps_train/policy_1_2": -154.3365020751953, "logps_train/policy_1_l": -160.85516357421875, "logps_train/policy_1_w": -96.7403564453125, "logps_train/policy_2_2": -104.56281280517578, "logps_train/policy_2_w": -145.13780212402344, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -1.1961504220962524, "rewards_train/1-l": -1.7444026470184326, "rewards_train/1-w": 1.9567267894744873, "rewards_train/2-2": 1.7374687194824219, "rewards_train/2-w": -0.699620246887207, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.70112943649292, "rewards_train/margins_1": 3.1528772115707397, "rewards_train/margins_2": 2.437088966369629, "step": 140 }, { "epoch": 0.42, "logps_train/policy_1_2": -164.30447387695312, "logps_train/policy_1_l": -134.1373291015625, "logps_train/policy_1_w": -107.78422546386719, "logps_train/policy_2_2": -114.31902313232422, "logps_train/policy_2_w": -149.74395751953125, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -0.8312283754348755, "rewards_train/1-l": -0.9223268032073975, "rewards_train/1-w": 1.7563426494598389, "rewards_train/2-2": 2.1384100914001465, "rewards_train/2-w": -0.7560376524925232, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.6786694526672363, "rewards_train/margins_1": 2.5875710248947144, "rewards_train/margins_2": 2.8944477438926697, "step": 141 }, { "epoch": 0.42, "logps_train/policy_1_2": -222.43527221679688, "logps_train/policy_1_l": -189.22604370117188, "logps_train/policy_1_w": -114.6683349609375, "logps_train/policy_2_2": -157.5511016845703, "logps_train/policy_2_w": -164.9055633544922, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.4654016494750977, "rewards_train/1-l": -1.7830162048339844, "rewards_train/1-w": 2.29254150390625, "rewards_train/2-2": 2.257976531982422, "rewards_train/2-w": -0.27649423480033875, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.075557708740234, "rewards_train/margins_1": 3.7579431533813477, "rewards_train/margins_2": 2.5344707667827606, "step": 141 }, { "epoch": 0.42, "logps_train/policy_1_2": -158.44351196289062, "logps_train/policy_1_l": -145.22637939453125, "logps_train/policy_1_w": -108.24197387695312, "logps_train/policy_2_2": -118.21722412109375, "logps_train/policy_2_w": -140.18386840820312, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -0.5947428345680237, "rewards_train/1-l": -1.0795707702636719, "rewards_train/1-w": 1.7250213623046875, "rewards_train/2-2": 1.5306206941604614, "rewards_train/2-w": 0.06833143532276154, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 2.8045921325683594, "rewards_train/margins_1": 2.319764196872711, "rewards_train/margins_2": 1.4622892588377, "step": 141 }, { "epoch": 0.42, "logps_train/policy_1_2": -162.31704711914062, "logps_train/policy_1_l": -162.86695861816406, "logps_train/policy_1_w": -107.58307647705078, "logps_train/policy_2_2": -113.40437316894531, "logps_train/policy_2_w": -150.46221923828125, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -0.8289704322814941, "rewards_train/1-l": -1.973609209060669, "rewards_train/1-w": 1.7233331203460693, "rewards_train/2-2": 1.8140547275543213, "rewards_train/2-w": -0.6313789486885071, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.6969423294067383, "rewards_train/margins_1": 2.5523035526275635, "rewards_train/margins_2": 2.4454336762428284, "step": 141 }, { "epoch": 0.43, "learning_rate": 4.652609029418389e-06, "loss": 1.1903, "step": 142 }, { "epoch": 0.43, "logps_train/policy_1_2": -185.26930236816406, "logps_train/policy_1_l": -258.3362121582031, "logps_train/policy_1_w": -155.57818603515625, "logps_train/policy_2_2": -136.39073181152344, "logps_train/policy_2_w": -215.54844665527344, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -0.2589621841907501, "rewards_train/1-l": -2.7974889278411865, "rewards_train/1-w": 3.0238232612609863, "rewards_train/2-2": 2.3820199966430664, "rewards_train/2-w": -0.13609500229358673, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.821312189102173, "rewards_train/margins_1": 3.2827854454517365, "rewards_train/margins_2": 2.518114998936653, "step": 142 }, { "epoch": 0.43, "logps_train/policy_1_2": -173.2603759765625, "logps_train/policy_1_l": -158.99166870117188, "logps_train/policy_1_w": -150.24069213867188, "logps_train/policy_2_2": -126.67243194580078, "logps_train/policy_2_w": -194.36968994140625, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.33624207973480225, "rewards_train/1-l": -1.2681620121002197, "rewards_train/1-w": 2.0463414192199707, "rewards_train/2-2": 2.3416924476623535, "rewards_train/2-w": -0.3826712369918823, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.3145034313201904, "rewards_train/margins_1": 2.382583498954773, "rewards_train/margins_2": 2.724363684654236, "step": 142 }, { "epoch": 0.43, "logps_train/policy_1_2": -142.63827514648438, "logps_train/policy_1_l": -137.1167449951172, "logps_train/policy_1_w": -115.13451385498047, "logps_train/policy_2_2": -104.15360260009766, "logps_train/policy_2_w": -150.52182006835938, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -0.3067971467971802, "rewards_train/1-l": -1.1241745948791504, "rewards_train/1-w": 2.134693145751953, "rewards_train/2-2": 1.877413034439087, "rewards_train/2-w": 0.1106124222278595, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.2588677406311035, "rewards_train/margins_1": 2.4414902925491333, "rewards_train/margins_2": 1.7668006122112274, "step": 142 }, { "epoch": 0.43, "logps_train/policy_1_2": -183.3142547607422, "logps_train/policy_1_l": -205.97479248046875, "logps_train/policy_1_w": -130.8865966796875, "logps_train/policy_2_2": -133.83729553222656, "logps_train/policy_2_w": -177.71847534179688, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.3937300443649292, "rewards_train/1-l": -2.34913969039917, "rewards_train/1-w": 2.0015759468078613, "rewards_train/2-2": 1.5897080898284912, "rewards_train/2-w": -0.45036178827285767, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.350715637207031, "rewards_train/margins_1": 3.3953059911727905, "rewards_train/margins_2": 2.040069878101349, "step": 142 }, { "epoch": 0.43, "logps_train/policy_1_2": -139.510009765625, "logps_train/policy_1_l": -170.29135131835938, "logps_train/policy_1_w": -92.98262786865234, "logps_train/policy_2_2": -108.78697204589844, "logps_train/policy_2_w": -118.93312072753906, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": -0.08283660560846329, "rewards_train/1-l": -1.4911704063415527, "rewards_train/1-w": 1.1391152143478394, "rewards_train/2-2": 1.709193468093872, "rewards_train/2-w": -0.20298054814338684, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.630285620689392, "rewards_train/margins_1": 1.2219518199563026, "rewards_train/margins_2": 1.912174016237259, "step": 143 }, { "epoch": 0.43, "logps_train/policy_1_2": -104.54808807373047, "logps_train/policy_1_l": -87.67181396484375, "logps_train/policy_1_w": -81.97650146484375, "logps_train/policy_2_2": -72.43031311035156, "logps_train/policy_2_w": -111.10841369628906, "logps_train/ref_1_2": -99.5, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": -0.502464771270752, "rewards_train/1-l": -0.978998064994812, "rewards_train/1-w": 1.2932682037353516, "rewards_train/2-2": 1.2473986148834229, "rewards_train/2-w": -0.2335945963859558, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.2722662687301636, "rewards_train/margins_1": 1.7957329750061035, "rewards_train/margins_2": 1.4809932112693787, "step": 143 }, { "epoch": 0.43, "logps_train/policy_1_2": -160.2230224609375, "logps_train/policy_1_l": -190.88583374023438, "logps_train/policy_1_w": -86.42721557617188, "logps_train/policy_2_2": -110.89698791503906, "logps_train/policy_2_w": -132.870849609375, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -121.5, "rewards_train/1-2": -1.4748419523239136, "rewards_train/1-l": -1.4699444770812988, "rewards_train/1-w": 0.9262238144874573, "rewards_train/2-2": 1.0341298580169678, "rewards_train/2-w": -1.1335700750350952, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.396168291568756, "rewards_train/margins_1": 2.401065766811371, "rewards_train/margins_2": 2.167699933052063, "step": 143 }, { "epoch": 0.43, "logps_train/policy_1_2": -193.2041778564453, "logps_train/policy_1_l": -167.6488037109375, "logps_train/policy_1_w": -143.30181884765625, "logps_train/policy_2_2": -139.6373291015625, "logps_train/policy_2_w": -193.199951171875, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -0.7571361064910889, "rewards_train/1-l": -1.3877317905426025, "rewards_train/1-w": 2.5151314735412598, "rewards_train/2-2": 1.8636106252670288, "rewards_train/2-w": -0.1989971250295639, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9028632640838623, "rewards_train/margins_1": 3.2722675800323486, "rewards_train/margins_2": 2.0626077502965927, "step": 143 }, { "epoch": 0.43, "learning_rate": 4.6399444585292266e-06, "loss": 1.1365, "step": 144 }, { "epoch": 0.43, "logps_train/policy_1_2": -188.65867614746094, "logps_train/policy_1_l": -178.55343627929688, "logps_train/policy_1_w": -130.17626953125, "logps_train/policy_2_2": -122.24364471435547, "logps_train/policy_2_w": -187.87100219726562, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -1.3243634700775146, "rewards_train/1-l": -1.3025845289230347, "rewards_train/1-w": 1.5657963752746582, "rewards_train/2-2": 2.166553020477295, "rewards_train/2-w": -1.253896713256836, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.868380904197693, "rewards_train/margins_1": 2.890159845352173, "rewards_train/margins_2": 3.420449733734131, "step": 144 }, { "epoch": 0.43, "logps_train/policy_1_2": -126.19438171386719, "logps_train/policy_1_l": -94.22738647460938, "logps_train/policy_1_w": -70.2968521118164, "logps_train/policy_2_2": -96.52537536621094, "logps_train/policy_2_w": -102.43379974365234, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -99.5, "rewards_train/1-2": -0.033109400421381, "rewards_train/1-l": -0.7452974319458008, "rewards_train/1-w": 1.4659202098846436, "rewards_train/2-2": 1.729395866394043, "rewards_train/2-w": -0.3151082396507263, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.2112176418304443, "rewards_train/margins_1": 1.4990296103060246, "rewards_train/margins_2": 2.0445041060447693, "step": 144 }, { "epoch": 0.43, "logps_train/policy_1_2": -245.5782470703125, "logps_train/policy_1_l": -148.92236328125, "logps_train/policy_1_w": -131.7823486328125, "logps_train/policy_2_2": -172.44508361816406, "logps_train/policy_2_w": -185.65744018554688, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.653918743133545, "rewards_train/1-l": -1.049170732498169, "rewards_train/1-w": 2.32332706451416, "rewards_train/2-2": 3.1281471252441406, "rewards_train/2-w": -0.6165255308151245, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.372497797012329, "rewards_train/margins_1": 3.977245807647705, "rewards_train/margins_2": 3.744672656059265, "step": 144 }, { "epoch": 0.43, "logps_train/policy_1_2": -227.63973999023438, "logps_train/policy_1_l": -207.19790649414062, "logps_train/policy_1_w": -160.06643676757812, "logps_train/policy_2_2": -157.20773315429688, "logps_train/policy_2_w": -227.1170654296875, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": -1.3862874507904053, "rewards_train/1-l": -1.458266258239746, "rewards_train/1-w": 2.559370994567871, "rewards_train/2-2": 2.9932894706726074, "rewards_train/2-w": -1.636512279510498, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.017637252807617, "rewards_train/margins_1": 3.9456584453582764, "rewards_train/margins_2": 4.6298017501831055, "step": 144 }, { "epoch": 0.43, "logps_train/policy_1_2": -179.72218322753906, "logps_train/policy_1_l": -284.719482421875, "logps_train/policy_1_w": -136.32904052734375, "logps_train/policy_2_2": -127.56521606445312, "logps_train/policy_2_w": -195.2943115234375, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -251.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -0.7370620369911194, "rewards_train/1-l": -3.4331295490264893, "rewards_train/1-w": 2.584674835205078, "rewards_train/2-2": 2.251291036605835, "rewards_train/2-w": -0.23138412833213806, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.017804384231567, "rewards_train/margins_1": 3.3217368721961975, "rewards_train/margins_2": 2.482675164937973, "step": 145 }, { "epoch": 0.43, "logps_train/policy_1_2": -156.2779541015625, "logps_train/policy_1_l": -194.65211486816406, "logps_train/policy_1_w": -117.3819351196289, "logps_train/policy_2_2": -100.2894515991211, "logps_train/policy_2_w": -177.72543334960938, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.2797495126724243, "rewards_train/1-l": -1.833571195602417, "rewards_train/1-w": 2.000087261199951, "rewards_train/2-2": 2.000791072845459, "rewards_train/2-w": -0.9750822186470032, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.833658456802368, "rewards_train/margins_1": 3.2798367738723755, "rewards_train/margins_2": 2.975873291492462, "step": 145 }, { "epoch": 0.43, "logps_train/policy_1_2": -178.83428955078125, "logps_train/policy_1_l": -145.57864379882812, "logps_train/policy_1_w": -109.65697479248047, "logps_train/policy_2_2": -134.25123596191406, "logps_train/policy_2_w": -157.78224182128906, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -0.7060866355895996, "rewards_train/1-l": -1.4635281562805176, "rewards_train/1-w": 1.8203375339508057, "rewards_train/2-2": 1.8362045288085938, "rewards_train/2-w": -0.8402846455574036, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.2838656902313232, "rewards_train/margins_1": 2.5264241695404053, "rewards_train/margins_2": 2.6764891743659973, "step": 145 }, { "epoch": 0.43, "logps_train/policy_1_2": -192.27157592773438, "logps_train/policy_1_l": -172.13758850097656, "logps_train/policy_1_w": -168.00125122070312, "logps_train/policy_2_2": -120.69694519042969, "logps_train/policy_2_w": -242.19723510742188, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -1.6435647010803223, "rewards_train/1-l": -1.6503310203552246, "rewards_train/1-w": 2.0709686279296875, "rewards_train/2-2": 2.203547477722168, "rewards_train/2-w": -2.046090602874756, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.721299648284912, "rewards_train/margins_1": 3.7145333290100098, "rewards_train/margins_2": 4.249638080596924, "step": 145 }, { "epoch": 0.44, "learning_rate": 4.627071032833401e-06, "loss": 1.1487, "step": 146 }, { "epoch": 0.44, "logps_train/policy_1_2": -252.9007568359375, "logps_train/policy_1_l": -181.53192138671875, "logps_train/policy_1_w": -158.15675354003906, "logps_train/policy_2_2": -176.443603515625, "logps_train/policy_2_w": -227.5728759765625, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": -1.2057002782821655, "rewards_train/1-l": -2.044715642929077, "rewards_train/1-w": 2.707646131515503, "rewards_train/2-2": 2.8775148391723633, "rewards_train/2-w": -1.4689823389053345, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.75236177444458, "rewards_train/margins_1": 3.9133464097976685, "rewards_train/margins_2": 4.346497178077698, "step": 146 }, { "epoch": 0.44, "logps_train/policy_1_2": -248.87994384765625, "logps_train/policy_1_l": -215.02896118164062, "logps_train/policy_1_w": -126.2065658569336, "logps_train/policy_2_2": -185.29464721679688, "logps_train/policy_2_w": -172.4459686279297, "logps_train/ref_1_2": -241.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.790338397026062, "rewards_train/1-l": -2.268376111984253, "rewards_train/1-w": 2.2285618782043457, "rewards_train/2-2": 2.851785182952881, "rewards_train/2-w": -0.2699875831604004, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.496937990188599, "rewards_train/margins_1": 3.0189002752304077, "rewards_train/margins_2": 3.1217727661132812, "step": 146 }, { "epoch": 0.44, "logps_train/policy_1_2": -166.18438720703125, "logps_train/policy_1_l": -105.89013671875, "logps_train/policy_1_w": -130.26309204101562, "logps_train/policy_2_2": -118.82012176513672, "logps_train/policy_2_w": -190.39529418945312, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -0.5367973446846008, "rewards_train/1-l": -1.153466820716858, "rewards_train/1-w": 2.2129480838775635, "rewards_train/2-2": 2.159980297088623, "rewards_train/2-w": -1.2860145568847656, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.3664149045944214, "rewards_train/margins_1": 2.7497454285621643, "rewards_train/margins_2": 3.4459948539733887, "step": 146 }, { "epoch": 0.44, "logps_train/policy_1_2": -212.78353881835938, "logps_train/policy_1_l": -230.85671997070312, "logps_train/policy_1_w": -131.72348022460938, "logps_train/policy_2_2": -140.86557006835938, "logps_train/policy_2_w": -203.54466247558594, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.5376315116882324, "rewards_train/1-l": -2.5765905380249023, "rewards_train/1-w": 1.8763818740844727, "rewards_train/2-2": 2.697916030883789, "rewards_train/2-w": -1.7685275077819824, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.452972412109375, "rewards_train/margins_1": 3.414013385772705, "rewards_train/margins_2": 4.4664435386657715, "step": 146 }, { "epoch": 0.44, "logps_train/policy_1_2": -162.56890869140625, "logps_train/policy_1_l": -171.0838623046875, "logps_train/policy_1_w": -104.2076416015625, "logps_train/policy_2_2": -114.445068359375, "logps_train/policy_2_w": -139.23663330078125, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": -0.7502499222755432, "rewards_train/1-l": -2.0288939476013184, "rewards_train/1-w": 1.311267375946045, "rewards_train/2-2": 1.9824470281600952, "rewards_train/2-w": -0.6385077238082886, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3401613235473633, "rewards_train/margins_1": 2.061517298221588, "rewards_train/margins_2": 2.620954751968384, "step": 147 }, { "epoch": 0.44, "logps_train/policy_1_2": -129.10255432128906, "logps_train/policy_1_l": -103.32829284667969, "logps_train/policy_1_w": -87.21263122558594, "logps_train/policy_2_2": -80.1484375, "logps_train/policy_2_w": -151.22763061523438, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -0.9063493013381958, "rewards_train/1-l": -1.0325608253479004, "rewards_train/1-w": 2.054126739501953, "rewards_train/2-2": 1.6683597564697266, "rewards_train/2-w": -1.5878998041152954, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.0866875648498535, "rewards_train/margins_1": 2.960476040840149, "rewards_train/margins_2": 3.256259560585022, "step": 147 }, { "epoch": 0.44, "logps_train/policy_1_2": -159.59515380859375, "logps_train/policy_1_l": -113.75238037109375, "logps_train/policy_1_w": -99.2745361328125, "logps_train/policy_2_2": -118.52123260498047, "logps_train/policy_2_w": -131.51803588867188, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -104.5, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": -0.7309989929199219, "rewards_train/1-l": -0.945843517780304, "rewards_train/1-w": 1.4998903274536133, "rewards_train/2-2": 1.7289310693740845, "rewards_train/2-w": -0.5799281597137451, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.4457338452339172, "rewards_train/margins_1": 2.230889320373535, "rewards_train/margins_2": 2.3088592290878296, "step": 147 }, { "epoch": 0.44, "logps_train/policy_1_2": -150.10003662109375, "logps_train/policy_1_l": -128.21066284179688, "logps_train/policy_1_w": -117.88441467285156, "logps_train/policy_2_2": -99.09919738769531, "logps_train/policy_2_w": -162.0556640625, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": -0.8971133828163147, "rewards_train/1-l": -1.235910177230835, "rewards_train/1-w": 1.8272804021835327, "rewards_train/2-2": 2.074455499649048, "rewards_train/2-w": -0.7397472858428955, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.0631905794143677, "rewards_train/margins_1": 2.7243937849998474, "rewards_train/margins_2": 2.8142027854919434, "step": 147 }, { "epoch": 0.44, "learning_rate": 4.613990008754565e-06, "loss": 0.9668, "step": 148 }, { "epoch": 0.44, "logps_train/policy_1_2": -149.95968627929688, "logps_train/policy_1_l": -134.453369140625, "logps_train/policy_1_w": -111.63928985595703, "logps_train/policy_2_2": -111.96891784667969, "logps_train/policy_2_w": -149.67318725585938, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -0.5236042141914368, "rewards_train/1-l": -1.0204830169677734, "rewards_train/1-w": 1.8765984773635864, "rewards_train/2-2": 1.7557697296142578, "rewards_train/2-w": -0.26485300064086914, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.89708149433136, "rewards_train/margins_1": 2.400202691555023, "rewards_train/margins_2": 2.020622730255127, "step": 148 }, { "epoch": 0.44, "logps_train/policy_1_2": -183.3144073486328, "logps_train/policy_1_l": -180.07080078125, "logps_train/policy_1_w": -128.93875122070312, "logps_train/policy_2_2": -117.38641357421875, "logps_train/policy_2_w": -197.8859100341797, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -1.9509719610214233, "rewards_train/1-l": -1.5012203454971313, "rewards_train/1-w": 1.8227256536483765, "rewards_train/2-2": 2.0912418365478516, "rewards_train/2-w": -2.2883951663970947, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.323945999145508, "rewards_train/margins_1": 3.7736976146698, "rewards_train/margins_2": 4.379637002944946, "step": 148 }, { "epoch": 0.44, "logps_train/policy_1_2": -220.73533630371094, "logps_train/policy_1_l": -235.24191284179688, "logps_train/policy_1_w": -149.22579956054688, "logps_train/policy_2_2": -160.86007690429688, "logps_train/policy_2_w": -212.37454223632812, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": -0.9317370653152466, "rewards_train/1-l": -2.7605204582214355, "rewards_train/1-w": 2.4278111457824707, "rewards_train/2-2": 2.7413363456726074, "rewards_train/2-w": -1.146830677986145, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.188331604003906, "rewards_train/margins_1": 3.3595482110977173, "rewards_train/margins_2": 3.8881670236587524, "step": 148 }, { "epoch": 0.44, "logps_train/policy_1_2": -167.4060516357422, "logps_train/policy_1_l": -180.37185668945312, "logps_train/policy_1_w": -130.93130493164062, "logps_train/policy_2_2": -128.43145751953125, "logps_train/policy_2_w": -193.87945556640625, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -0.4585734009742737, "rewards_train/1-l": -1.7751245498657227, "rewards_train/1-w": 2.055452585220337, "rewards_train/2-2": 1.7029483318328857, "rewards_train/2-w": -1.5624072551727295, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8305771350860596, "rewards_train/margins_1": 2.5140259861946106, "rewards_train/margins_2": 3.2653555870056152, "step": 148 }, { "epoch": 0.45, "logps_train/policy_1_2": -203.66580200195312, "logps_train/policy_1_l": -216.2772674560547, "logps_train/policy_1_w": -150.98899841308594, "logps_train/policy_2_2": -145.24725341796875, "logps_train/policy_2_w": -228.4005126953125, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -1.613454818725586, "rewards_train/1-l": -1.9366132020950317, "rewards_train/1-w": 2.9454360008239746, "rewards_train/2-2": 1.6542778015136719, "rewards_train/2-w": -1.0947377681732178, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.882049202919006, "rewards_train/margins_1": 4.5588908195495605, "rewards_train/margins_2": 2.7490155696868896, "step": 149 }, { "epoch": 0.45, "logps_train/policy_1_2": -173.68087768554688, "logps_train/policy_1_l": -159.34112548828125, "logps_train/policy_1_w": -152.78009033203125, "logps_train/policy_2_2": -117.82963562011719, "logps_train/policy_2_w": -209.29815673828125, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.2857651710510254, "rewards_train/1-l": -1.5497381687164307, "rewards_train/1-w": 2.254119396209717, "rewards_train/2-2": 1.7582474946975708, "rewards_train/2-w": -1.1528618335723877, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8038575649261475, "rewards_train/margins_1": 3.539884567260742, "rewards_train/margins_2": 2.9111093282699585, "step": 149 }, { "epoch": 0.45, "logps_train/policy_1_2": -187.896728515625, "logps_train/policy_1_l": -179.5756378173828, "logps_train/policy_1_w": -122.82295227050781, "logps_train/policy_2_2": -128.152587890625, "logps_train/policy_2_w": -186.9064178466797, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -1.4340099096298218, "rewards_train/1-l": -1.8866649866104126, "rewards_train/1-w": 2.002714157104492, "rewards_train/2-2": 2.2093505859375, "rewards_train/2-w": -1.840836524963379, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.889379143714905, "rewards_train/margins_1": 3.436724066734314, "rewards_train/margins_2": 4.050187110900879, "step": 149 }, { "epoch": 0.45, "logps_train/policy_1_2": -230.474853515625, "logps_train/policy_1_l": -200.32752990722656, "logps_train/policy_1_w": -148.38656616210938, "logps_train/policy_2_2": -166.88433837890625, "logps_train/policy_2_w": -213.7900390625, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -0.5881105661392212, "rewards_train/1-l": -1.694178819656372, "rewards_train/1-w": 2.789468288421631, "rewards_train/2-2": 3.2853944301605225, "rewards_train/2-w": -1.0829100608825684, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.483647108078003, "rewards_train/margins_1": 3.377578854560852, "rewards_train/margins_2": 4.368304491043091, "step": 149 }, { "epoch": 0.45, "learning_rate": 4.600702662977611e-06, "loss": 0.9952, "step": 150 }, { "epoch": 0.45, "logps_train/policy_1_2": -163.52938842773438, "logps_train/policy_1_l": -184.11451721191406, "logps_train/policy_1_w": -133.09664916992188, "logps_train/policy_2_2": -120.0005111694336, "logps_train/policy_2_w": -205.18942260742188, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.6923923492431641, "rewards_train/1-l": -1.7125744819641113, "rewards_train/1-w": 2.0081095695495605, "rewards_train/2-2": 1.8405742645263672, "rewards_train/2-w": -1.4892545938491821, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.720684051513672, "rewards_train/margins_1": 2.7005019187927246, "rewards_train/margins_2": 3.3298288583755493, "step": 150 }, { "epoch": 0.45, "logps_train/policy_1_2": -204.97377014160156, "logps_train/policy_1_l": -162.1294403076172, "logps_train/policy_1_w": -124.91505432128906, "logps_train/policy_2_2": -143.316162109375, "logps_train/policy_2_w": -190.0011749267578, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.5481574535369873, "rewards_train/1-l": -1.762955904006958, "rewards_train/1-w": 2.371799945831299, "rewards_train/2-2": 2.018775224685669, "rewards_train/2-w": -1.4352741241455078, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.134755849838257, "rewards_train/margins_1": 3.919957399368286, "rewards_train/margins_2": 3.4540493488311768, "step": 150 }, { "epoch": 0.45, "logps_train/policy_1_2": -232.18344116210938, "logps_train/policy_1_l": -232.36526489257812, "logps_train/policy_1_w": -146.2384490966797, "logps_train/policy_2_2": -149.55282592773438, "logps_train/policy_2_w": -223.4256134033203, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -2.325376510620117, "rewards_train/1-l": -2.752542018890381, "rewards_train/1-w": 2.287482738494873, "rewards_train/2-2": 2.3775291442871094, "rewards_train/2-w": -1.899201512336731, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.040024757385254, "rewards_train/margins_1": 4.61285924911499, "rewards_train/margins_2": 4.27673065662384, "step": 150 }, { "epoch": 0.45, "logps_train/policy_1_2": -168.9834442138672, "logps_train/policy_1_l": -173.66094970703125, "logps_train/policy_1_w": -121.47681427001953, "logps_train/policy_2_2": -118.62804412841797, "logps_train/policy_2_w": -176.71343994140625, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -0.5719771981239319, "rewards_train/1-l": -2.424968719482422, "rewards_train/1-w": 2.6322503089904785, "rewards_train/2-2": 2.3854379653930664, "rewards_train/2-w": -0.4933163523674011, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.0572190284729, "rewards_train/margins_1": 3.2042275071144104, "rewards_train/margins_2": 2.8787543177604675, "step": 150 }, { "epoch": 0.45, "logps_train/policy_1_2": -158.1625518798828, "logps_train/policy_1_l": -151.95567321777344, "logps_train/policy_1_w": -136.53028869628906, "logps_train/policy_2_2": -119.93824005126953, "logps_train/policy_2_w": -199.34677124023438, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": -0.4371536374092102, "rewards_train/1-l": -1.9932246208190918, "rewards_train/1-w": 2.3790035247802734, "rewards_train/2-2": 1.7825431823730469, "rewards_train/2-w": -1.0651471614837646, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.372228145599365, "rewards_train/margins_1": 2.8161571621894836, "rewards_train/margins_2": 2.8476903438568115, "step": 151 }, { "epoch": 0.45, "logps_train/policy_1_2": -208.29031372070312, "logps_train/policy_1_l": -160.43771362304688, "logps_train/policy_1_w": -155.0218505859375, "logps_train/policy_2_2": -149.93666076660156, "logps_train/policy_2_w": -225.6886444091797, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -1.588308334350586, "rewards_train/1-l": -1.4011211395263672, "rewards_train/1-w": 2.521838903427124, "rewards_train/2-2": 2.401353359222412, "rewards_train/2-w": -1.5173022747039795, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.922960042953491, "rewards_train/margins_1": 4.11014723777771, "rewards_train/margins_2": 3.9186556339263916, "step": 151 }, { "epoch": 0.45, "logps_train/policy_1_2": -172.65895080566406, "logps_train/policy_1_l": -186.0933837890625, "logps_train/policy_1_w": -158.94378662109375, "logps_train/policy_2_2": -125.727783203125, "logps_train/policy_2_w": -215.31451416015625, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -0.9133559465408325, "rewards_train/1-l": -1.9982788562774658, "rewards_train/1-w": 2.0528862476348877, "rewards_train/2-2": 1.9676520824432373, "rewards_train/2-w": -1.5627005100250244, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.0511651039123535, "rewards_train/margins_1": 2.96624219417572, "rewards_train/margins_2": 3.5303525924682617, "step": 151 }, { "epoch": 0.45, "logps_train/policy_1_2": -171.89712524414062, "logps_train/policy_1_l": -212.2274169921875, "logps_train/policy_1_w": -103.9393310546875, "logps_train/policy_2_2": -110.20225524902344, "logps_train/policy_2_w": -155.86300659179688, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -1.9787739515304565, "rewards_train/1-l": -2.4171743392944336, "rewards_train/1-w": 1.7105598449707031, "rewards_train/2-2": 1.8250877857208252, "rewards_train/2-w": -1.1864964962005615, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.127734184265137, "rewards_train/margins_1": 3.6893337965011597, "rewards_train/margins_2": 3.0115842819213867, "step": 151 }, { "epoch": 0.46, "learning_rate": 4.587210292324062e-06, "loss": 0.957, "step": 152 }, { "epoch": 0.46, "logps_train/policy_1_2": -207.25839233398438, "logps_train/policy_1_l": -212.67153930664062, "logps_train/policy_1_w": -133.82408142089844, "logps_train/policy_2_2": -158.21774291992188, "logps_train/policy_2_w": -194.780029296875, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -0.2676350474357605, "rewards_train/1-l": -2.4754538536071777, "rewards_train/1-w": 2.106459379196167, "rewards_train/2-2": 2.282717227935791, "rewards_train/2-w": -1.3752667903900146, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.581913232803345, "rewards_train/margins_1": 2.3740944266319275, "rewards_train/margins_2": 3.6579840183258057, "step": 152 }, { "epoch": 0.46, "logps_train/policy_1_2": -198.82765197753906, "logps_train/policy_1_l": -160.8692169189453, "logps_train/policy_1_w": -131.98849487304688, "logps_train/policy_2_2": -128.06826782226562, "logps_train/policy_2_w": -196.08444213867188, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -2.142921209335327, "rewards_train/1-l": -1.4156932830810547, "rewards_train/1-w": 2.215798854827881, "rewards_train/2-2": 2.107234477996826, "rewards_train/2-w": -1.605124592781067, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.6314921379089355, "rewards_train/margins_1": 4.358720064163208, "rewards_train/margins_2": 3.712359070777893, "step": 152 }, { "epoch": 0.46, "logps_train/policy_1_2": -233.39483642578125, "logps_train/policy_1_l": -220.93043518066406, "logps_train/policy_1_w": -149.33804321289062, "logps_train/policy_2_2": -159.5056915283203, "logps_train/policy_2_w": -220.62289428710938, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -1.9133121967315674, "rewards_train/1-l": -2.3051533699035645, "rewards_train/1-w": 2.500424385070801, "rewards_train/2-2": 2.454167127609253, "rewards_train/2-w": -1.319663166999817, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.805577754974365, "rewards_train/margins_1": 4.413736581802368, "rewards_train/margins_2": 3.77383029460907, "step": 152 }, { "epoch": 0.46, "logps_train/policy_1_2": -216.16055297851562, "logps_train/policy_1_l": -193.56317138671875, "logps_train/policy_1_w": -133.6905975341797, "logps_train/policy_2_2": -144.540283203125, "logps_train/policy_2_w": -193.73458862304688, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.24808669090271, "rewards_train/1-l": -1.724968671798706, "rewards_train/1-w": 1.9640460014343262, "rewards_train/2-2": 2.948315143585205, "rewards_train/2-w": -1.1354695558547974, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6890146732330322, "rewards_train/margins_1": 3.212132692337036, "rewards_train/margins_2": 4.083784699440002, "step": 152 }, { "epoch": 0.46, "logps_train/policy_1_2": -178.69583129882812, "logps_train/policy_1_l": -147.64674377441406, "logps_train/policy_1_w": -159.9069366455078, "logps_train/policy_2_2": -111.13960266113281, "logps_train/policy_2_w": -221.51722717285156, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": -1.851224422454834, "rewards_train/1-l": -1.2921152114868164, "rewards_train/1-w": 1.030595064163208, "rewards_train/2-2": 1.8387737274169922, "rewards_train/2-w": -2.2497682571411133, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.3227102756500244, "rewards_train/margins_1": 2.881819486618042, "rewards_train/margins_2": 4.0885419845581055, "step": 153 }, { "epoch": 0.46, "logps_train/policy_1_2": -198.79861450195312, "logps_train/policy_1_l": -217.2116241455078, "logps_train/policy_1_w": -131.1014862060547, "logps_train/policy_2_2": -135.44741821289062, "logps_train/policy_2_w": -195.99050903320312, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.516678810119629, "rewards_train/1-l": -2.433466672897339, "rewards_train/1-w": 1.6582109928131104, "rewards_train/2-2": 2.1153171062469482, "rewards_train/2-w": -1.6021759510040283, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.091677665710449, "rewards_train/margins_1": 3.1748898029327393, "rewards_train/margins_2": 3.7174930572509766, "step": 153 }, { "epoch": 0.46, "logps_train/policy_1_2": -224.78231811523438, "logps_train/policy_1_l": -189.27105712890625, "logps_train/policy_1_w": -122.7793960571289, "logps_train/policy_2_2": -160.6732940673828, "logps_train/policy_2_w": -169.28976440429688, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.2938578128814697, "rewards_train/1-l": -1.5687077045440674, "rewards_train/1-w": 2.3865134716033936, "rewards_train/2-2": 2.343608856201172, "rewards_train/2-w": -0.17936623096466064, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.955221176147461, "rewards_train/margins_1": 3.6803712844848633, "rewards_train/margins_2": 2.5229750871658325, "step": 153 }, { "epoch": 0.46, "logps_train/policy_1_2": -175.29782104492188, "logps_train/policy_1_l": -134.50115966796875, "logps_train/policy_1_w": -97.94548034667969, "logps_train/policy_2_2": -129.59320068359375, "logps_train/policy_2_w": -139.81320190429688, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": -1.0137670040130615, "rewards_train/1-l": -1.1935741901397705, "rewards_train/1-w": 1.5015456676483154, "rewards_train/2-2": 1.7865774631500244, "rewards_train/2-w": -0.8629604578018188, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.695119857788086, "rewards_train/margins_1": 2.515312671661377, "rewards_train/margins_2": 2.6495379209518433, "step": 153 }, { "epoch": 0.46, "learning_rate": 4.573514213625505e-06, "loss": 1.0522, "step": 154 }, { "epoch": 0.46, "logps_train/policy_1_2": -214.368896484375, "logps_train/policy_1_l": -187.99708557128906, "logps_train/policy_1_w": -113.92721557617188, "logps_train/policy_2_2": -149.0433349609375, "logps_train/policy_2_w": -188.45867919921875, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.2577873468399048, "rewards_train/1-l": -2.048048257827759, "rewards_train/1-w": 2.076467990875244, "rewards_train/2-2": 2.556117057800293, "rewards_train/2-w": -2.019305944442749, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.124516248703003, "rewards_train/margins_1": 3.334255337715149, "rewards_train/margins_2": 4.575423002243042, "step": 154 }, { "epoch": 0.46, "logps_train/policy_1_2": -160.90992736816406, "logps_train/policy_1_l": -138.88796997070312, "logps_train/policy_1_w": -128.73638916015625, "logps_train/policy_2_2": -116.34989929199219, "logps_train/policy_2_w": -166.72738647460938, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -0.64079749584198, "rewards_train/1-l": -0.8960345387458801, "rewards_train/1-w": 1.8007632493972778, "rewards_train/2-2": 2.0407915115356445, "rewards_train/2-w": -0.31336385011672974, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.696797788143158, "rewards_train/margins_1": 2.441560745239258, "rewards_train/margins_2": 2.3541553616523743, "step": 154 }, { "epoch": 0.46, "logps_train/policy_1_2": -236.20924377441406, "logps_train/policy_1_l": -180.28964233398438, "logps_train/policy_1_w": -143.48643493652344, "logps_train/policy_2_2": -174.83847045898438, "logps_train/policy_2_w": -213.90585327148438, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -1.0981709957122803, "rewards_train/1-l": -1.4668543338775635, "rewards_train/1-w": 2.7434463500976562, "rewards_train/2-2": 2.342520236968994, "rewards_train/2-w": -1.1552330255508423, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.21030068397522, "rewards_train/margins_1": 3.8416173458099365, "rewards_train/margins_2": 3.4977532625198364, "step": 154 }, { "epoch": 0.46, "logps_train/policy_1_2": -174.36453247070312, "logps_train/policy_1_l": -137.12750244140625, "logps_train/policy_1_w": -108.00357818603516, "logps_train/policy_2_2": -121.8883056640625, "logps_train/policy_2_w": -172.54931640625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.4616496562957764, "rewards_train/1-l": -1.6874573230743408, "rewards_train/1-w": 2.255305767059326, "rewards_train/2-2": 1.7158575057983398, "rewards_train/2-w": -1.3615721464157104, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.942763090133667, "rewards_train/margins_1": 3.7169554233551025, "rewards_train/margins_2": 3.0774296522140503, "step": 154 }, { "epoch": 0.46, "logps_train/policy_1_2": -216.57334899902344, "logps_train/policy_1_l": -166.9011993408203, "logps_train/policy_1_w": -154.30838012695312, "logps_train/policy_2_2": -157.7230987548828, "logps_train/policy_2_w": -217.5372772216797, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -1.547569990158081, "rewards_train/1-l": -1.7106282711029053, "rewards_train/1-w": 2.462129831314087, "rewards_train/2-2": 1.7737836837768555, "rewards_train/2-w": -0.9060714244842529, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.172758102416992, "rewards_train/margins_1": 4.009699821472168, "rewards_train/margins_2": 2.6798551082611084, "step": 155 }, { "epoch": 0.46, "logps_train/policy_1_2": -201.01657104492188, "logps_train/policy_1_l": -158.47726440429688, "logps_train/policy_1_w": -149.20379638671875, "logps_train/policy_2_2": -146.51968383789062, "logps_train/policy_2_w": -222.00753784179688, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -0.8918914794921875, "rewards_train/1-l": -1.5980201959609985, "rewards_train/1-w": 2.7473692893981934, "rewards_train/2-2": 2.0246803760528564, "rewards_train/2-w": -1.235520362854004, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.345389485359192, "rewards_train/margins_1": 3.639260768890381, "rewards_train/margins_2": 3.2602007389068604, "step": 155 }, { "epoch": 0.46, "logps_train/policy_1_2": -163.0907440185547, "logps_train/policy_1_l": -150.91758728027344, "logps_train/policy_1_w": -132.5414581298828, "logps_train/policy_2_2": -119.71792602539062, "logps_train/policy_2_w": -193.996337890625, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -0.6248946189880371, "rewards_train/1-l": -1.0358998775482178, "rewards_train/1-w": 2.6126513481140137, "rewards_train/2-2": 1.9028167724609375, "rewards_train/2-w": -0.9758057594299316, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.6485512256622314, "rewards_train/margins_1": 3.237545967102051, "rewards_train/margins_2": 2.878622531890869, "step": 155 }, { "epoch": 0.46, "logps_train/policy_1_2": -172.0681610107422, "logps_train/policy_1_l": -155.01861572265625, "logps_train/policy_1_w": -132.7630615234375, "logps_train/policy_2_2": -122.97469329833984, "logps_train/policy_2_w": -191.03350830078125, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.0943162441253662, "rewards_train/1-l": -1.4518616199493408, "rewards_train/1-w": 2.2186152935028076, "rewards_train/2-2": 1.7501873970031738, "rewards_train/2-w": -1.106475830078125, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6704769134521484, "rewards_train/margins_1": 3.312931537628174, "rewards_train/margins_2": 2.856663227081299, "step": 155 }, { "epoch": 0.47, "learning_rate": 4.55961576359508e-06, "loss": 0.9514, "step": 156 }, { "epoch": 0.47, "logps_train/policy_1_2": -104.45063781738281, "logps_train/policy_1_l": -130.69378662109375, "logps_train/policy_1_w": -95.66815185546875, "logps_train/policy_2_2": -71.03547668457031, "logps_train/policy_2_w": -129.6361541748047, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -0.027486324310302734, "rewards_train/1-l": -0.917621374130249, "rewards_train/1-w": 2.4140443801879883, "rewards_train/2-2": 1.801140308380127, "rewards_train/2-w": 0.4402904510498047, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3316657543182373, "rewards_train/margins_1": 2.441530704498291, "rewards_train/margins_2": 1.3608498573303223, "step": 156 }, { "epoch": 0.47, "logps_train/policy_1_2": -151.21463012695312, "logps_train/policy_1_l": -147.64988708496094, "logps_train/policy_1_w": -134.82583618164062, "logps_train/policy_2_2": -104.67695617675781, "logps_train/policy_2_w": -187.15394592285156, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -0.8460742235183716, "rewards_train/1-l": -2.007469654083252, "rewards_train/1-w": 1.8797215223312378, "rewards_train/2-2": 1.8010540008544922, "rewards_train/2-w": -1.0751599073410034, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8871911764144897, "rewards_train/margins_1": 2.7257957458496094, "rewards_train/margins_2": 2.8762139081954956, "step": 156 }, { "epoch": 0.47, "logps_train/policy_1_2": -208.70205688476562, "logps_train/policy_1_l": -177.4949951171875, "logps_train/policy_1_w": -157.0088348388672, "logps_train/policy_2_2": -139.7818603515625, "logps_train/policy_2_w": -237.11041259765625, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -1.491119146347046, "rewards_train/1-l": -1.7504920959472656, "rewards_train/1-w": 2.4432568550109863, "rewards_train/2-2": 2.2627267837524414, "rewards_train/2-w": -1.7200267314910889, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.193748950958252, "rewards_train/margins_1": 3.9343760013580322, "rewards_train/margins_2": 3.9827535152435303, "step": 156 }, { "epoch": 0.47, "logps_train/policy_1_2": -218.35052490234375, "logps_train/policy_1_l": -172.55210876464844, "logps_train/policy_1_w": -137.846435546875, "logps_train/policy_2_2": -156.3005828857422, "logps_train/policy_2_w": -203.22926330566406, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -1.2916935682296753, "rewards_train/1-l": -2.010471820831299, "rewards_train/1-w": 2.467308282852173, "rewards_train/2-2": 2.4422080516815186, "rewards_train/2-w": -1.0033942461013794, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.477780103683472, "rewards_train/margins_1": 3.759001851081848, "rewards_train/margins_2": 3.445602297782898, "step": 156 }, { "epoch": 0.47, "logps_train/policy_1_2": -193.51919555664062, "logps_train/policy_1_l": -252.95443725585938, "logps_train/policy_1_w": -156.46273803710938, "logps_train/policy_2_2": -142.77711486816406, "logps_train/policy_2_w": -228.75735473632812, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -0.9753563404083252, "rewards_train/1-l": -2.4245214462280273, "rewards_train/1-w": 3.195133686065674, "rewards_train/2-2": 1.9968969821929932, "rewards_train/2-w": -0.9382343888282776, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.619655132293701, "rewards_train/margins_1": 4.170490026473999, "rewards_train/margins_2": 2.9351313710212708, "step": 157 }, { "epoch": 0.47, "logps_train/policy_1_2": -128.53268432617188, "logps_train/policy_1_l": -136.675048828125, "logps_train/policy_1_w": -104.25784301757812, "logps_train/policy_2_2": -94.43048095703125, "logps_train/policy_2_w": -140.96044921875, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": -0.6167462468147278, "rewards_train/1-l": -1.0876212120056152, "rewards_train/1-w": 1.6484341621398926, "rewards_train/2-2": 1.4380073547363281, "rewards_train/2-w": -0.1946779191493988, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.736055374145508, "rewards_train/margins_1": 2.2651804089546204, "rewards_train/margins_2": 1.632685273885727, "step": 157 }, { "epoch": 0.47, "logps_train/policy_1_2": -169.02862548828125, "logps_train/policy_1_l": -114.4771957397461, "logps_train/policy_1_w": -116.81800842285156, "logps_train/policy_2_2": -119.54447937011719, "logps_train/policy_2_w": -166.4113311767578, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.8997385501861572, "rewards_train/1-l": -1.203383207321167, "rewards_train/1-w": 1.9574564695358276, "rewards_train/2-2": 1.6822706460952759, "rewards_train/2-w": -0.6842963695526123, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1608396768569946, "rewards_train/margins_1": 2.857195019721985, "rewards_train/margins_2": 2.366567015647888, "step": 157 }, { "epoch": 0.47, "logps_train/policy_1_2": -148.5068817138672, "logps_train/policy_1_l": -95.75468444824219, "logps_train/policy_1_w": -79.0654296875, "logps_train/policy_2_2": -102.9210205078125, "logps_train/policy_2_w": -124.36307525634766, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -84.5, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -115.5, "rewards_train/1-2": -0.9561564922332764, "rewards_train/1-l": -1.1545950174331665, "rewards_train/1-w": 1.69384765625, "rewards_train/2-2": 1.6836793422698975, "rewards_train/2-w": -0.8602330684661865, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.8484426736831665, "rewards_train/margins_1": 2.6500041484832764, "rewards_train/margins_2": 2.543912410736084, "step": 157 }, { "epoch": 0.47, "learning_rate": 4.545516298697006e-06, "loss": 1.119, "step": 158 }, { "epoch": 0.47, "logps_train/policy_1_2": -182.55291748046875, "logps_train/policy_1_l": -186.37521362304688, "logps_train/policy_1_w": -135.440673828125, "logps_train/policy_2_2": -138.69107055664062, "logps_train/policy_2_w": -186.38595581054688, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -0.6455263495445251, "rewards_train/1-l": -1.2637903690338135, "rewards_train/1-w": 2.848900318145752, "rewards_train/2-2": 1.765267252922058, "rewards_train/2-w": 0.0711696594953537, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.112690687179565, "rewards_train/margins_1": 3.494426667690277, "rewards_train/margins_2": 1.6940975934267044, "step": 158 }, { "epoch": 0.47, "logps_train/policy_1_2": -218.04933166503906, "logps_train/policy_1_l": -193.35784912109375, "logps_train/policy_1_w": -175.71273803710938, "logps_train/policy_2_2": -163.21551513671875, "logps_train/policy_2_w": -230.23175048828125, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -207.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": -0.8611836433410645, "rewards_train/1-l": -2.182465076446533, "rewards_train/1-w": 3.163883686065674, "rewards_train/2-2": 2.420635938644409, "rewards_train/2-w": -0.22083213925361633, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.346348762512207, "rewards_train/margins_1": 4.025067329406738, "rewards_train/margins_2": 2.6414680778980255, "step": 158 }, { "epoch": 0.47, "logps_train/policy_1_2": -165.0201416015625, "logps_train/policy_1_l": -153.16949462890625, "logps_train/policy_1_w": -106.87191009521484, "logps_train/policy_2_2": -120.27239990234375, "logps_train/policy_2_w": -150.80859375, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.579357385635376, "rewards_train/1-l": -1.6078665256500244, "rewards_train/1-w": 1.9935706853866577, "rewards_train/2-2": 1.880571961402893, "rewards_train/2-w": -0.267968088388443, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.601437211036682, "rewards_train/margins_1": 2.5729280710220337, "rewards_train/margins_2": 2.148540049791336, "step": 158 }, { "epoch": 0.47, "logps_train/policy_1_2": -169.33108520507812, "logps_train/policy_1_l": -160.20773315429688, "logps_train/policy_1_w": -109.86807250976562, "logps_train/policy_2_2": -133.58599853515625, "logps_train/policy_2_w": -148.57302856445312, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": 0.20751579105854034, "rewards_train/1-l": -1.5169646739959717, "rewards_train/1-w": 1.8179786205291748, "rewards_train/2-2": 2.131244421005249, "rewards_train/2-w": -0.31452882289886475, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.5625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3349432945251465, "rewards_train/margins_1": 1.6104628294706345, "rewards_train/margins_2": 2.4457732439041138, "step": 158 }, { "epoch": 0.48, "logps_train/policy_1_2": -213.93179321289062, "logps_train/policy_1_l": -170.7122802734375, "logps_train/policy_1_w": -114.09803771972656, "logps_train/policy_2_2": -144.6815185546875, "logps_train/policy_2_w": -182.47242736816406, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -1.6369290351867676, "rewards_train/1-l": -1.304895043373108, "rewards_train/1-w": 1.9300403594970703, "rewards_train/2-2": 2.1357531547546387, "rewards_train/2-w": -1.3370864391326904, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.2349354028701782, "rewards_train/margins_1": 3.566969394683838, "rewards_train/margins_2": 3.472839593887329, "step": 159 }, { "epoch": 0.48, "logps_train/policy_1_2": -158.1676483154297, "logps_train/policy_1_l": -171.52439880371094, "logps_train/policy_1_w": -94.2636947631836, "logps_train/policy_2_2": -99.41903686523438, "logps_train/policy_2_w": -141.99740600585938, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": -1.2801436185836792, "rewards_train/1-l": -1.6283793449401855, "rewards_train/1-w": 1.5759745836257935, "rewards_train/2-2": 1.9991121292114258, "rewards_train/2-w": -0.8739586472511292, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.204353928565979, "rewards_train/margins_1": 2.8561182022094727, "rewards_train/margins_2": 2.873070776462555, "step": 159 }, { "epoch": 0.48, "logps_train/policy_1_2": -230.82284545898438, "logps_train/policy_1_l": -162.15907287597656, "logps_train/policy_1_w": -100.70095825195312, "logps_train/policy_2_2": -173.12869262695312, "logps_train/policy_2_w": -147.69644165039062, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -0.9783790111541748, "rewards_train/1-l": -2.2780168056488037, "rewards_train/1-w": 1.8302950859069824, "rewards_train/2-2": 2.249509334564209, "rewards_train/2-w": -0.7438637018203735, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.108311891555786, "rewards_train/margins_1": 2.8086740970611572, "rewards_train/margins_2": 2.9933730363845825, "step": 159 }, { "epoch": 0.48, "logps_train/policy_1_2": -112.53387451171875, "logps_train/policy_1_l": -185.45510864257812, "logps_train/policy_1_w": -98.21028900146484, "logps_train/policy_2_2": -87.40990447998047, "logps_train/policy_2_w": -130.40853881835938, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": -0.3871765732765198, "rewards_train/1-l": -1.9910187721252441, "rewards_train/1-w": 1.2619062662124634, "rewards_train/2-2": 0.9606208801269531, "rewards_train/2-w": -0.24478407204151154, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.2529250383377075, "rewards_train/margins_1": 1.6490828394889832, "rewards_train/margins_2": 1.2054049521684647, "step": 159 }, { "epoch": 0.48, "learning_rate": 4.531217195014204e-06, "loss": 1.1295, "step": 160 }, { "epoch": 0.48, "logps_train/policy_1_2": -251.05007934570312, "logps_train/policy_1_l": -180.7425079345703, "logps_train/policy_1_w": -158.6890869140625, "logps_train/policy_2_2": -173.696044921875, "logps_train/policy_2_w": -225.18670654296875, "logps_train/ref_1_2": -237.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -1.3667259216308594, "rewards_train/1-l": -1.3468093872070312, "rewards_train/1-w": 2.3154659271240234, "rewards_train/2-2": 3.5475828647613525, "rewards_train/2-w": -1.7303886413574219, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.6622753143310547, "rewards_train/margins_1": 3.682191848754883, "rewards_train/margins_2": 5.277971506118774, "step": 160 }, { "epoch": 0.48, "logps_train/policy_1_2": -247.67483520507812, "logps_train/policy_1_l": -174.0596923828125, "logps_train/policy_1_w": -134.58212280273438, "logps_train/policy_2_2": -185.74517822265625, "logps_train/policy_2_w": -199.1273193359375, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -0.927249550819397, "rewards_train/1-l": -1.567394495010376, "rewards_train/1-w": 2.5960841178894043, "rewards_train/2-2": 2.6469664573669434, "rewards_train/2-w": -1.1439815759658813, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.16347861289978, "rewards_train/margins_1": 3.5233336687088013, "rewards_train/margins_2": 3.7909480333328247, "step": 160 }, { "epoch": 0.48, "logps_train/policy_1_2": -180.3792266845703, "logps_train/policy_1_l": -214.39248657226562, "logps_train/policy_1_w": -185.01214599609375, "logps_train/policy_2_2": -121.50546264648438, "logps_train/policy_2_w": -273.49761962890625, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -251.0, "rewards_train/1-2": -1.0683914422988892, "rewards_train/1-l": -2.465029716491699, "rewards_train/1-w": 3.207379102706909, "rewards_train/2-2": 2.2047274112701416, "rewards_train/2-w": -2.2560107707977295, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.672408819198608, "rewards_train/margins_1": 4.275770545005798, "rewards_train/margins_2": 4.460738182067871, "step": 160 }, { "epoch": 0.48, "logps_train/policy_1_2": -237.70652770996094, "logps_train/policy_1_l": -196.10601806640625, "logps_train/policy_1_w": -145.46194458007812, "logps_train/policy_2_2": -164.818359375, "logps_train/policy_2_w": -208.8483123779297, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -1.371628761291504, "rewards_train/1-l": -2.240680694580078, "rewards_train/1-w": 2.2381796836853027, "rewards_train/2-2": 3.0291028022766113, "rewards_train/2-w": -1.2481117248535156, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.478860378265381, "rewards_train/margins_1": 3.6098084449768066, "rewards_train/margins_2": 4.277214527130127, "step": 160 }, { "epoch": 0.48, "logps_train/policy_1_2": -243.1158447265625, "logps_train/policy_1_l": -185.46998596191406, "logps_train/policy_1_w": -178.1396942138672, "logps_train/policy_2_2": -156.78701782226562, "logps_train/policy_2_w": -259.9268798828125, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": -2.571349620819092, "rewards_train/1-l": -1.8685194253921509, "rewards_train/1-w": 3.0325160026550293, "rewards_train/2-2": 2.272860050201416, "rewards_train/2-w": -2.0270628929138184, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.90103542804718, "rewards_train/margins_1": 5.603865623474121, "rewards_train/margins_2": 4.299922943115234, "step": 161 }, { "epoch": 0.48, "logps_train/policy_1_2": -217.2509765625, "logps_train/policy_1_l": -296.85992431640625, "logps_train/policy_1_w": -183.39382934570312, "logps_train/policy_2_2": -148.54127502441406, "logps_train/policy_2_w": -247.75869750976562, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -266.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": -0.8403303623199463, "rewards_train/1-l": -3.1471242904663086, "rewards_train/1-w": 2.1992902755737305, "rewards_train/2-2": 3.0407938957214355, "rewards_train/2-w": -1.3395402431488037, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.346414566040039, "rewards_train/margins_1": 3.0396206378936768, "rewards_train/margins_2": 4.380334138870239, "step": 161 }, { "epoch": 0.48, "logps_train/policy_1_2": -198.06861877441406, "logps_train/policy_1_l": -128.4260711669922, "logps_train/policy_1_w": -136.48065185546875, "logps_train/policy_2_2": -149.1338348388672, "logps_train/policy_2_w": -181.91030883789062, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -0.5560808777809143, "rewards_train/1-l": -0.7898720502853394, "rewards_train/1-w": 2.0350399017333984, "rewards_train/2-2": 2.278120756149292, "rewards_train/2-w": -0.6664223670959473, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.824911952018738, "rewards_train/margins_1": 2.5911207795143127, "rewards_train/margins_2": 2.9445431232452393, "step": 161 }, { "epoch": 0.48, "logps_train/policy_1_2": -187.88845825195312, "logps_train/policy_1_l": -143.10086059570312, "logps_train/policy_1_w": -115.52730560302734, "logps_train/policy_2_2": -118.22613525390625, "logps_train/policy_2_w": -186.27410888671875, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.9325965642929077, "rewards_train/1-l": -1.4565696716308594, "rewards_train/1-w": 1.8468791246414185, "rewards_train/2-2": 2.0715272426605225, "rewards_train/2-w": -2.050846576690674, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.303448796272278, "rewards_train/margins_1": 3.779475688934326, "rewards_train/margins_2": 4.122373819351196, "step": 161 }, { "epoch": 0.49, "learning_rate": 4.516719848113983e-06, "loss": 0.9332, "step": 162 }, { "epoch": 0.49, "logps_train/policy_1_2": -273.09375, "logps_train/policy_1_l": -160.59259033203125, "logps_train/policy_1_w": -161.98309326171875, "logps_train/policy_2_2": -204.64962768554688, "logps_train/policy_2_w": -222.08282470703125, "logps_train/ref_1_2": -262.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -233.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -1.0750010013580322, "rewards_train/1-l": -1.5153125524520874, "rewards_train/1-w": 2.5391898155212402, "rewards_train/2-2": 2.867849349975586, "rewards_train/2-w": -0.9957811832427979, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.054502367973328, "rewards_train/margins_1": 3.6141908168792725, "rewards_train/margins_2": 3.863630533218384, "step": 162 }, { "epoch": 0.49, "logps_train/policy_1_2": -172.70272827148438, "logps_train/policy_1_l": -156.93670654296875, "logps_train/policy_1_w": -116.2547378540039, "logps_train/policy_2_2": -133.83901977539062, "logps_train/policy_2_w": -168.67691040039062, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -0.3298443853855133, "rewards_train/1-l": -1.6726746559143066, "rewards_train/1-w": 2.006166934967041, "rewards_train/2-2": 2.065316677093506, "rewards_train/2-w": -1.255190134048462, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.6788415908813477, "rewards_train/margins_1": 2.3360113203525543, "rewards_train/margins_2": 3.3205068111419678, "step": 162 }, { "epoch": 0.49, "logps_train/policy_1_2": -140.24977111816406, "logps_train/policy_1_l": -159.3897705078125, "logps_train/policy_1_w": -115.68233489990234, "logps_train/policy_2_2": -96.84999084472656, "logps_train/policy_2_w": -172.4263916015625, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -114.5, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -0.40173473954200745, "rewards_train/1-l": -1.8995240926742554, "rewards_train/1-w": 2.418778419494629, "rewards_train/2-2": 1.780430555343628, "rewards_train/2-w": -0.8087531328201294, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.318302512168884, "rewards_train/margins_1": 2.8205131590366364, "rewards_train/margins_2": 2.5891836881637573, "step": 162 }, { "epoch": 0.49, "logps_train/policy_1_2": -143.99949645996094, "logps_train/policy_1_l": -163.587890625, "logps_train/policy_1_w": -136.8798065185547, "logps_train/policy_2_2": -106.56629943847656, "logps_train/policy_2_w": -181.86244201660156, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -0.4534648656845093, "rewards_train/1-l": -1.7064461708068848, "rewards_train/1-w": 1.8979572057724, "rewards_train/2-2": 1.3976666927337646, "rewards_train/2-w": -0.8291152715682983, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.6044033765792847, "rewards_train/margins_1": 2.351422071456909, "rewards_train/margins_2": 2.226781964302063, "step": 162 }, { "epoch": 0.49, "logps_train/policy_1_2": -168.7281951904297, "logps_train/policy_1_l": -139.18768310546875, "logps_train/policy_1_w": -132.4075164794922, "logps_train/policy_2_2": -116.86669921875, "logps_train/policy_2_w": -178.3385009765625, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -0.683171272277832, "rewards_train/1-l": -0.8116392493247986, "rewards_train/1-w": 2.210029125213623, "rewards_train/2-2": 2.371825933456421, "rewards_train/2-w": -0.7182244658470154, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0216683745384216, "rewards_train/margins_1": 2.893200397491455, "rewards_train/margins_2": 3.0900503993034363, "step": 163 }, { "epoch": 0.49, "logps_train/policy_1_2": -158.3529510498047, "logps_train/policy_1_l": -199.3907470703125, "logps_train/policy_1_w": -144.07510375976562, "logps_train/policy_2_2": -112.47028350830078, "logps_train/policy_2_w": -209.60928344726562, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": -0.6590251922607422, "rewards_train/1-l": -2.041863441467285, "rewards_train/1-w": 2.6034276485443115, "rewards_train/2-2": 1.9438894987106323, "rewards_train/2-w": -1.0374904870986938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.645291090011597, "rewards_train/margins_1": 3.2624528408050537, "rewards_train/margins_2": 2.981379985809326, "step": 163 }, { "epoch": 0.49, "logps_train/policy_1_2": -201.40457153320312, "logps_train/policy_1_l": -195.07675170898438, "logps_train/policy_1_w": -163.31793212890625, "logps_train/policy_2_2": -155.1157684326172, "logps_train/policy_2_w": -217.4965057373047, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": -0.22007180750370026, "rewards_train/1-l": -2.130232810974121, "rewards_train/1-w": 3.054145097732544, "rewards_train/2-2": 2.4228715896606445, "rewards_train/2-w": -0.06293237209320068, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.184377908706665, "rewards_train/margins_1": 3.274216905236244, "rewards_train/margins_2": 2.485803961753845, "step": 163 }, { "epoch": 0.49, "logps_train/policy_1_2": -198.31082153320312, "logps_train/policy_1_l": -198.46641540527344, "logps_train/policy_1_w": -124.2972640991211, "logps_train/policy_2_2": -136.2404022216797, "logps_train/policy_2_w": -181.41058349609375, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.3888944387435913, "rewards_train/1-l": -1.4985947608947754, "rewards_train/1-w": 1.6931865215301514, "rewards_train/2-2": 2.1226391792297363, "rewards_train/2-w": -1.2973072528839111, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.1917812824249268, "rewards_train/margins_1": 3.0820809602737427, "rewards_train/margins_2": 3.4199464321136475, "step": 163 }, { "epoch": 0.49, "learning_rate": 4.502025672911845e-06, "loss": 0.9802, "step": 164 }, { "epoch": 0.49, "logps_train/policy_1_2": -225.968017578125, "logps_train/policy_1_l": -212.14903259277344, "logps_train/policy_1_w": -177.55032348632812, "logps_train/policy_2_2": -166.24392700195312, "logps_train/policy_2_w": -246.5778045654297, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -201.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": 0.009839046746492386, "rewards_train/1-l": -2.5389280319213867, "rewards_train/1-w": 2.3613739013671875, "rewards_train/2-2": 3.1897432804107666, "rewards_train/2-w": -1.5552418231964111, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.900301933288574, "rewards_train/margins_1": 2.351534854620695, "rewards_train/margins_2": 4.744985103607178, "step": 164 }, { "epoch": 0.49, "logps_train/policy_1_2": -201.8682403564453, "logps_train/policy_1_l": -215.33712768554688, "logps_train/policy_1_w": -145.19229125976562, "logps_train/policy_2_2": -139.35275268554688, "logps_train/policy_2_w": -207.76119995117188, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.3230547904968262, "rewards_train/1-l": -2.3903536796569824, "rewards_train/1-w": 2.493464946746826, "rewards_train/2-2": 2.258646011352539, "rewards_train/2-w": -1.060103178024292, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.883818626403809, "rewards_train/margins_1": 3.8165197372436523, "rewards_train/margins_2": 3.318749189376831, "step": 164 }, { "epoch": 0.49, "logps_train/policy_1_2": -163.61904907226562, "logps_train/policy_1_l": -146.98486328125, "logps_train/policy_1_w": -151.13775634765625, "logps_train/policy_2_2": -128.82647705078125, "logps_train/policy_2_w": -202.0455322265625, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": 0.2615317702293396, "rewards_train/1-l": -1.4745616912841797, "rewards_train/1-w": 1.7058547735214233, "rewards_train/2-2": 2.402118444442749, "rewards_train/2-w": -1.0059202909469604, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.180416464805603, "rewards_train/margins_1": 1.4443230032920837, "rewards_train/margins_2": 3.4080387353897095, "step": 164 }, { "epoch": 0.49, "logps_train/policy_1_2": -180.54042053222656, "logps_train/policy_1_l": -181.4987030029297, "logps_train/policy_1_w": -115.13563537597656, "logps_train/policy_2_2": -122.81044006347656, "logps_train/policy_2_w": -178.8148193359375, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -1.1503318548202515, "rewards_train/1-l": -1.9539721012115479, "rewards_train/1-w": 1.9748151302337646, "rewards_train/2-2": 2.2019152641296387, "rewards_train/2-w": -1.4727908372879028, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9287872314453125, "rewards_train/margins_1": 3.125146985054016, "rewards_train/margins_2": 3.6747061014175415, "step": 164 }, { "epoch": 0.49, "logps_train/policy_1_2": -206.82806396484375, "logps_train/policy_1_l": -197.03465270996094, "logps_train/policy_1_w": -109.04991149902344, "logps_train/policy_2_2": -151.17166137695312, "logps_train/policy_2_w": -159.01959228515625, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -0.5281186103820801, "rewards_train/1-l": -2.5925283432006836, "rewards_train/1-w": 1.8340710401535034, "rewards_train/2-2": 2.6172101497650146, "rewards_train/2-w": -0.7816465497016907, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.426599383354187, "rewards_train/margins_1": 2.3621896505355835, "rewards_train/margins_2": 3.3988566994667053, "step": 165 }, { "epoch": 0.49, "logps_train/policy_1_2": -192.8827362060547, "logps_train/policy_1_l": -156.89263916015625, "logps_train/policy_1_w": -144.68133544921875, "logps_train/policy_2_2": -134.1680908203125, "logps_train/policy_2_w": -194.49819946289062, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.2050704956054688, "rewards_train/1-l": -1.4202696084976196, "rewards_train/1-w": 2.053741693496704, "rewards_train/2-2": 2.07928466796875, "rewards_train/2-w": -0.8795062303543091, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.4740113019943237, "rewards_train/margins_1": 3.258812189102173, "rewards_train/margins_2": 2.958790898323059, "step": 165 }, { "epoch": 0.49, "logps_train/policy_1_2": -204.793212890625, "logps_train/policy_1_l": -213.9516143798828, "logps_train/policy_1_w": -140.50559997558594, "logps_train/policy_2_2": -147.28598022460938, "logps_train/policy_2_w": -195.141357421875, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.9504159688949585, "rewards_train/1-l": -1.9732860326766968, "rewards_train/1-w": 2.903346300125122, "rewards_train/2-2": 2.755776882171631, "rewards_train/2-w": -0.38991808891296387, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.876632332801819, "rewards_train/margins_1": 3.8537622690200806, "rewards_train/margins_2": 3.1456949710845947, "step": 165 }, { "epoch": 0.49, "logps_train/policy_1_2": -201.33790588378906, "logps_train/policy_1_l": -159.36859130859375, "logps_train/policy_1_w": -135.74365234375, "logps_train/policy_2_2": -152.29690551757812, "logps_train/policy_2_w": -180.4331512451172, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -0.518165111541748, "rewards_train/1-l": -0.8612732291221619, "rewards_train/1-w": 2.0475101470947266, "rewards_train/2-2": 2.4906222820281982, "rewards_train/2-w": -0.3526896834373474, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.9087833762168884, "rewards_train/margins_1": 2.5656752586364746, "rewards_train/margins_2": 2.8433119654655457, "step": 165 }, { "epoch": 0.5, "learning_rate": 4.487136103533383e-06, "loss": 0.9779, "step": 166 }, { "epoch": 0.5, "logps_train/policy_1_2": -148.24717712402344, "logps_train/policy_1_l": -191.6892852783203, "logps_train/policy_1_w": -150.08053588867188, "logps_train/policy_2_2": -101.0845947265625, "logps_train/policy_2_w": -216.94699096679688, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -0.5411242842674255, "rewards_train/1-l": -1.6697092056274414, "rewards_train/1-w": 2.7880406379699707, "rewards_train/2-2": 1.8987667560577393, "rewards_train/2-w": -1.0771209001541138, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.457749843597412, "rewards_train/margins_1": 3.3291649222373962, "rewards_train/margins_2": 2.975887656211853, "step": 166 }, { "epoch": 0.5, "logps_train/policy_1_2": -170.21380615234375, "logps_train/policy_1_l": -97.38224792480469, "logps_train/policy_1_w": -116.92132568359375, "logps_train/policy_2_2": -112.37190246582031, "logps_train/policy_2_w": -182.1492156982422, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -2.0280208587646484, "rewards_train/1-l": -0.7541670799255371, "rewards_train/1-w": 1.9816961288452148, "rewards_train/2-2": 1.5262866020202637, "rewards_train/2-w": -1.6718552112579346, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.735863208770752, "rewards_train/margins_1": 4.009716987609863, "rewards_train/margins_2": 3.1981418132781982, "step": 166 }, { "epoch": 0.5, "logps_train/policy_1_2": -212.0127410888672, "logps_train/policy_1_l": -183.11627197265625, "logps_train/policy_1_w": -121.1270751953125, "logps_train/policy_2_2": -149.4918670654297, "logps_train/policy_2_w": -178.82435607910156, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -0.9381884932518005, "rewards_train/1-l": -2.1221494674682617, "rewards_train/1-w": 2.014246702194214, "rewards_train/2-2": 2.5681967735290527, "rewards_train/2-w": -1.495276689529419, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.136396169662476, "rewards_train/margins_1": 2.9524351954460144, "rewards_train/margins_2": 4.063473463058472, "step": 166 }, { "epoch": 0.5, "logps_train/policy_1_2": -150.46835327148438, "logps_train/policy_1_l": -145.2542724609375, "logps_train/policy_1_w": -131.53721618652344, "logps_train/policy_2_2": -112.63644409179688, "logps_train/policy_2_w": -177.04696655273438, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -0.26685619354248047, "rewards_train/1-l": -0.8986206650733948, "rewards_train/1-w": 2.2385637760162354, "rewards_train/2-2": 1.8099398612976074, "rewards_train/2-w": -0.391806960105896, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.13718444108963, "rewards_train/margins_1": 2.505419969558716, "rewards_train/margins_2": 2.2017468214035034, "step": 166 }, { "epoch": 0.5, "logps_train/policy_1_2": -162.42919921875, "logps_train/policy_1_l": -134.06826782226562, "logps_train/policy_1_w": -104.05239868164062, "logps_train/policy_2_2": -110.72084045410156, "logps_train/policy_2_w": -145.1326446533203, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -1.856738567352295, "rewards_train/1-l": -1.358877182006836, "rewards_train/1-w": 1.5502290725708008, "rewards_train/2-2": 1.2671747207641602, "rewards_train/2-w": -0.7817216515541077, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.9091062545776367, "rewards_train/margins_1": 3.4069676399230957, "rewards_train/margins_2": 2.048896372318268, "step": 167 }, { "epoch": 0.5, "logps_train/policy_1_2": -135.56741333007812, "logps_train/policy_1_l": -122.00975036621094, "logps_train/policy_1_w": -113.54766845703125, "logps_train/policy_2_2": -94.06449890136719, "logps_train/policy_2_w": -172.04112243652344, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -0.8580850958824158, "rewards_train/1-l": -1.0848125219345093, "rewards_train/1-w": 1.8376160860061646, "rewards_train/2-2": 1.6954541206359863, "rewards_train/2-w": -1.3814563751220703, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.922428607940674, "rewards_train/margins_1": 2.6957011818885803, "rewards_train/margins_2": 3.0769104957580566, "step": 167 }, { "epoch": 0.5, "logps_train/policy_1_2": -219.495361328125, "logps_train/policy_1_l": -221.56658935546875, "logps_train/policy_1_w": -174.61434936523438, "logps_train/policy_2_2": -152.98541259765625, "logps_train/policy_2_w": -244.4697265625, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": -1.4510998725891113, "rewards_train/1-l": -1.2841975688934326, "rewards_train/1-w": 2.3522377014160156, "rewards_train/2-2": 2.26552152633667, "rewards_train/2-w": -1.5125977993011475, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.6364352703094482, "rewards_train/margins_1": 3.803337574005127, "rewards_train/margins_2": 3.7781193256378174, "step": 167 }, { "epoch": 0.5, "logps_train/policy_1_2": -163.4165496826172, "logps_train/policy_1_l": -150.90533447265625, "logps_train/policy_1_w": -102.39069366455078, "logps_train/policy_2_2": -113.63286590576172, "logps_train/policy_2_w": -145.26585388183594, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -0.9760302901268005, "rewards_train/1-l": -1.9547905921936035, "rewards_train/1-w": 1.7788991928100586, "rewards_train/2-2": 1.9449163675308228, "rewards_train/2-w": -0.8008045554161072, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.733689785003662, "rewards_train/margins_1": 2.754929482936859, "rewards_train/margins_2": 2.74572092294693, "step": 167 }, { "epoch": 0.5, "learning_rate": 4.472052593174323e-06, "loss": 1.1293, "step": 168 }, { "epoch": 0.5, "logps_train/policy_1_2": -169.96661376953125, "logps_train/policy_1_l": -191.6166534423828, "logps_train/policy_1_w": -147.89096069335938, "logps_train/policy_2_2": -131.64727783203125, "logps_train/policy_2_w": -199.1464385986328, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -0.5609187483787537, "rewards_train/1-l": -1.9804158210754395, "rewards_train/1-w": 2.407778739929199, "rewards_train/2-2": 1.5052926540374756, "rewards_train/2-w": -0.5830036401748657, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.388194561004639, "rewards_train/margins_1": 2.968697488307953, "rewards_train/margins_2": 2.0882962942123413, "step": 168 }, { "epoch": 0.5, "logps_train/policy_1_2": -221.58056640625, "logps_train/policy_1_l": -176.25830078125, "logps_train/policy_1_w": -140.98468017578125, "logps_train/policy_2_2": -164.76744079589844, "logps_train/policy_2_w": -195.57974243164062, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -0.6072757244110107, "rewards_train/1-l": -2.1751480102539062, "rewards_train/1-w": 2.9275083541870117, "rewards_train/2-2": 2.627943992614746, "rewards_train/2-w": -0.24039651453495026, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.102656364440918, "rewards_train/margins_1": 3.5347840785980225, "rewards_train/margins_2": 2.8683405071496964, "step": 168 }, { "epoch": 0.5, "logps_train/policy_1_2": -156.97930908203125, "logps_train/policy_1_l": -171.73712158203125, "logps_train/policy_1_w": -122.20983123779297, "logps_train/policy_2_2": -108.71065521240234, "logps_train/policy_2_w": -182.3760528564453, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -0.8889452219009399, "rewards_train/1-l": -1.9304509162902832, "rewards_train/1-w": 1.623792052268982, "rewards_train/2-2": 1.9480749368667603, "rewards_train/2-w": -1.4613354206085205, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.554242968559265, "rewards_train/margins_1": 2.512737274169922, "rewards_train/margins_2": 3.4094103574752808, "step": 168 }, { "epoch": 0.5, "logps_train/policy_1_2": -91.09463500976562, "logps_train/policy_1_l": -108.72793579101562, "logps_train/policy_1_w": -63.537628173828125, "logps_train/policy_2_2": -59.52374267578125, "logps_train/policy_2_w": -106.6766357421875, "logps_train/ref_1_2": -85.5, "logps_train/ref_1_l": -97.5, "logps_train/ref_1_w": -74.0, "logps_train/ref_2_2": -72.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": -0.5741789937019348, "rewards_train/1-l": -1.1361174583435059, "rewards_train/1-w": 1.0351592302322388, "rewards_train/2-2": 1.2434780597686768, "rewards_train/2-w": -1.3685400485992432, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.1712766885757446, "rewards_train/margins_1": 1.6093382239341736, "rewards_train/margins_2": 2.61201810836792, "step": 168 }, { "epoch": 0.51, "logps_train/policy_1_2": -190.41395568847656, "logps_train/policy_1_l": -149.89358520507812, "logps_train/policy_1_w": -148.60165405273438, "logps_train/policy_2_2": -131.91677856445312, "logps_train/policy_2_w": -220.8421173095703, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.166395902633667, "rewards_train/1-l": -1.3777852058410645, "rewards_train/1-w": 2.3533105850219727, "rewards_train/2-2": 2.1805880069732666, "rewards_train/2-w": -2.074445962905884, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.731095790863037, "rewards_train/margins_1": 3.5197064876556396, "rewards_train/margins_2": 4.25503396987915, "step": 169 }, { "epoch": 0.51, "logps_train/policy_1_2": -163.262451171875, "logps_train/policy_1_l": -186.79373168945312, "logps_train/policy_1_w": -141.67166137695312, "logps_train/policy_2_2": -118.82676696777344, "logps_train/policy_2_w": -182.91275024414062, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -1.1055413484573364, "rewards_train/1-l": -2.0248799324035645, "rewards_train/1-w": 2.189279556274414, "rewards_train/2-2": 1.6789441108703613, "rewards_train/2-w": -0.36783623695373535, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.2141594886779785, "rewards_train/margins_1": 3.2948209047317505, "rewards_train/margins_2": 2.0467803478240967, "step": 169 }, { "epoch": 0.51, "logps_train/policy_1_2": -101.69656372070312, "logps_train/policy_1_l": -95.21148681640625, "logps_train/policy_1_w": -90.57257080078125, "logps_train/policy_2_2": -66.28103637695312, "logps_train/policy_2_w": -132.27151489257812, "logps_train/ref_1_2": -95.0, "logps_train/ref_1_l": -86.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -79.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": -0.7007109522819519, "rewards_train/1-l": -0.9165460467338562, "rewards_train/1-w": 1.331805944442749, "rewards_train/2-2": 1.2965056896209717, "rewards_train/2-w": -0.6314490437507629, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.2483519911766052, "rewards_train/margins_1": 2.032516896724701, "rewards_train/margins_2": 1.9279547333717346, "step": 169 }, { "epoch": 0.51, "logps_train/policy_1_2": -223.38674926757812, "logps_train/policy_1_l": -198.08827209472656, "logps_train/policy_1_w": -154.8162384033203, "logps_train/policy_2_2": -159.32357788085938, "logps_train/policy_2_w": -218.7584991455078, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -1.1792995929718018, "rewards_train/1-l": -2.139490842819214, "rewards_train/1-w": 2.888688087463379, "rewards_train/2-2": 2.6082682609558105, "rewards_train/2-w": -0.9102247953414917, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.028178930282593, "rewards_train/margins_1": 4.067987680435181, "rewards_train/margins_2": 3.5184930562973022, "step": 169 }, { "epoch": 0.51, "learning_rate": 4.456776613958683e-06, "loss": 1.077, "step": 170 }, { "epoch": 0.51, "logps_train/policy_1_2": -174.85621643066406, "logps_train/policy_1_l": -177.85336303710938, "logps_train/policy_1_w": -149.39559936523438, "logps_train/policy_2_2": -130.21299743652344, "logps_train/policy_2_w": -208.06936645507812, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -0.3059333860874176, "rewards_train/1-l": -2.0146079063415527, "rewards_train/1-w": 2.0614173412323, "rewards_train/2-2": 2.4976463317871094, "rewards_train/2-w": -1.4510767459869385, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.0760252475738525, "rewards_train/margins_1": 2.3673507273197174, "rewards_train/margins_2": 3.948723077774048, "step": 170 }, { "epoch": 0.51, "logps_train/policy_1_2": -189.72177124023438, "logps_train/policy_1_l": -171.41725158691406, "logps_train/policy_1_w": -147.97601318359375, "logps_train/policy_2_2": -126.69246673583984, "logps_train/policy_2_w": -226.13876342773438, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -1.515537142753601, "rewards_train/1-l": -1.9540297985076904, "rewards_train/1-w": 2.4744696617126465, "rewards_train/2-2": 2.456925392150879, "rewards_train/2-w": -2.4537200927734375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.428499460220337, "rewards_train/margins_1": 3.9900068044662476, "rewards_train/margins_2": 4.910645484924316, "step": 170 }, { "epoch": 0.51, "logps_train/policy_1_2": -116.33935546875, "logps_train/policy_1_l": -98.89018249511719, "logps_train/policy_1_w": -82.71560668945312, "logps_train/policy_2_2": -85.44923400878906, "logps_train/policy_2_w": -106.82060241699219, "logps_train/ref_1_2": -112.5, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -108.5, "rewards_train/1-2": -0.36284202337265015, "rewards_train/1-l": -0.9586467742919922, "rewards_train/1-w": 1.5040255784988403, "rewards_train/2-2": 1.420262098312378, "rewards_train/2-w": 0.1712602972984314, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 2.4626723527908325, "rewards_train/margins_1": 1.8668676018714905, "rewards_train/margins_2": 1.2490018010139465, "step": 170 }, { "epoch": 0.51, "logps_train/policy_1_2": -167.17074584960938, "logps_train/policy_1_l": -116.36929321289062, "logps_train/policy_1_w": -165.60037231445312, "logps_train/policy_2_2": -114.05864715576172, "logps_train/policy_2_w": -235.56690979003906, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -0.8143404722213745, "rewards_train/1-l": -1.2665191888809204, "rewards_train/1-w": 2.7169156074523926, "rewards_train/2-2": 1.8886668682098389, "rewards_train/2-w": -0.96919184923172, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.983434796333313, "rewards_train/margins_1": 3.531256079673767, "rewards_train/margins_2": 2.857858717441559, "step": 170 }, { "epoch": 0.51, "logps_train/policy_1_2": -149.5611572265625, "logps_train/policy_1_l": -160.11825561523438, "logps_train/policy_1_w": -93.517578125, "logps_train/policy_2_2": -110.18357849121094, "logps_train/policy_2_w": -144.81539916992188, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -0.5862919092178345, "rewards_train/1-l": -1.438192367553711, "rewards_train/1-w": 2.0962886810302734, "rewards_train/2-2": 1.5914080142974854, "rewards_train/2-w": -0.9616172313690186, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.5344810485839844, "rewards_train/margins_1": 2.682580590248108, "rewards_train/margins_2": 2.553025245666504, "step": 171 }, { "epoch": 0.51, "logps_train/policy_1_2": -160.52444458007812, "logps_train/policy_1_l": -154.76271057128906, "logps_train/policy_1_w": -132.59536743164062, "logps_train/policy_2_2": -105.73109436035156, "logps_train/policy_2_w": -196.0803680419922, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.4444855451583862, "rewards_train/1-l": -1.7992684841156006, "rewards_train/1-w": 2.5808911323547363, "rewards_train/2-2": 1.8958852291107178, "rewards_train/2-w": -0.5633101463317871, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 4.380159616470337, "rewards_train/margins_1": 4.025376677513123, "rewards_train/margins_2": 2.459195375442505, "step": 171 }, { "epoch": 0.51, "logps_train/policy_1_2": -243.32418823242188, "logps_train/policy_1_l": -220.98516845703125, "logps_train/policy_1_w": -198.94955444335938, "logps_train/policy_2_2": -165.3105010986328, "logps_train/policy_2_w": -277.8846435546875, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -227.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": -2.010934829711914, "rewards_train/1-l": -2.7038140296936035, "rewards_train/1-w": 2.802090644836426, "rewards_train/2-2": 2.9138708114624023, "rewards_train/2-w": -2.0300137996673584, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.505904674530029, "rewards_train/margins_1": 4.81302547454834, "rewards_train/margins_2": 4.943884611129761, "step": 171 }, { "epoch": 0.51, "logps_train/policy_1_2": -173.73277282714844, "logps_train/policy_1_l": -151.0232696533203, "logps_train/policy_1_w": -131.71084594726562, "logps_train/policy_2_2": -124.639404296875, "logps_train/policy_2_w": -192.69281005859375, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -1.5424182415008545, "rewards_train/1-l": -2.0435380935668945, "rewards_train/1-w": 1.9511797428131104, "rewards_train/2-2": 1.6499272584915161, "rewards_train/2-w": -1.383734941482544, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.994717836380005, "rewards_train/margins_1": 3.493597984313965, "rewards_train/margins_2": 3.03366219997406, "step": 171 }, { "epoch": 0.51, "learning_rate": 4.441309656795106e-06, "loss": 0.9721, "step": 172 }, { "epoch": 0.51, "logps_train/policy_1_2": -140.27462768554688, "logps_train/policy_1_l": -143.42471313476562, "logps_train/policy_1_w": -117.4394760131836, "logps_train/policy_2_2": -107.02005004882812, "logps_train/policy_2_w": -173.9272918701172, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.5348841547966003, "rewards_train/1-l": -1.8084125518798828, "rewards_train/1-w": 1.6208956241607666, "rewards_train/2-2": 1.5645965337753296, "rewards_train/2-w": -1.4161677360534668, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4293081760406494, "rewards_train/margins_1": 2.155779778957367, "rewards_train/margins_2": 2.9807642698287964, "step": 172 }, { "epoch": 0.51, "logps_train/policy_1_2": -211.29544067382812, "logps_train/policy_1_l": -175.12030029296875, "logps_train/policy_1_w": -115.20384216308594, "logps_train/policy_2_2": -143.10189819335938, "logps_train/policy_2_w": -174.02166748046875, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.6275900602340698, "rewards_train/1-l": -2.073357582092285, "rewards_train/1-w": 1.7649424076080322, "rewards_train/2-2": 2.3616862297058105, "rewards_train/2-w": -1.6832215785980225, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.8382999897003174, "rewards_train/margins_1": 3.392532467842102, "rewards_train/margins_2": 4.044907808303833, "step": 172 }, { "epoch": 0.51, "logps_train/policy_1_2": -193.2279510498047, "logps_train/policy_1_l": -132.078125, "logps_train/policy_1_w": -109.19706726074219, "logps_train/policy_2_2": -132.79049682617188, "logps_train/policy_2_w": -155.98757934570312, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -115.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -1.5316336154937744, "rewards_train/1-l": -1.6827641725540161, "rewards_train/1-w": 1.5033408403396606, "rewards_train/2-2": 2.2456068992614746, "rewards_train/2-w": -1.2819609642028809, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1861050128936768, "rewards_train/margins_1": 3.034974455833435, "rewards_train/margins_2": 3.5275678634643555, "step": 172 }, { "epoch": 0.51, "logps_train/policy_1_2": -186.36148071289062, "logps_train/policy_1_l": -163.54115295410156, "logps_train/policy_1_w": -95.83629608154297, "logps_train/policy_2_2": -125.2870864868164, "logps_train/policy_2_w": -152.19387817382812, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -1.4033364057540894, "rewards_train/1-l": -2.2542130947113037, "rewards_train/1-w": 1.7109018564224243, "rewards_train/2-2": 2.121291399002075, "rewards_train/2-w": -1.477590799331665, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.965114951133728, "rewards_train/margins_1": 3.1142382621765137, "rewards_train/margins_2": 3.5988821983337402, "step": 172 }, { "epoch": 0.52, "logps_train/policy_1_2": -227.01934814453125, "logps_train/policy_1_l": -214.87319946289062, "logps_train/policy_1_w": -150.09796142578125, "logps_train/policy_2_2": -160.95596313476562, "logps_train/policy_2_w": -201.64947509765625, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -1.234551191329956, "rewards_train/1-l": -1.9603663682937622, "rewards_train/1-w": 2.0773138999938965, "rewards_train/2-2": 2.419638156890869, "rewards_train/2-w": -0.7118217349052429, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.037680268287659, "rewards_train/margins_1": 3.3118650913238525, "rewards_train/margins_2": 3.131459891796112, "step": 173 }, { "epoch": 0.52, "logps_train/policy_1_2": -162.968505859375, "logps_train/policy_1_l": -197.36692810058594, "logps_train/policy_1_w": -167.91839599609375, "logps_train/policy_2_2": -116.34983825683594, "logps_train/policy_2_w": -250.5206756591797, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -201.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": -0.5577898025512695, "rewards_train/1-l": -2.100072145462036, "rewards_train/1-w": 3.3353075981140137, "rewards_train/2-2": 1.9134539365768433, "rewards_train/2-w": -2.0153002738952637, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.43537974357605, "rewards_train/margins_1": 3.893097400665283, "rewards_train/margins_2": 3.928754210472107, "step": 173 }, { "epoch": 0.52, "logps_train/policy_1_2": -153.32452392578125, "logps_train/policy_1_l": -129.7076416015625, "logps_train/policy_1_w": -92.95037078857422, "logps_train/policy_2_2": -108.33724975585938, "logps_train/policy_2_w": -127.60871887207031, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -112.5, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -124.5, "rewards_train/1-2": -0.756281852722168, "rewards_train/1-l": -1.7118284702301025, "rewards_train/1-w": 1.5945134162902832, "rewards_train/2-2": 1.7617828845977783, "rewards_train/2-w": -0.3068682849407196, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3063418865203857, "rewards_train/margins_1": 2.350795269012451, "rewards_train/margins_2": 2.068651169538498, "step": 173 }, { "epoch": 0.52, "logps_train/policy_1_2": -214.5233154296875, "logps_train/policy_1_l": -191.51345825195312, "logps_train/policy_1_w": -171.81033325195312, "logps_train/policy_2_2": -160.67898559570312, "logps_train/policy_2_w": -224.6780548095703, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -0.22029981017112732, "rewards_train/1-l": -2.162869453430176, "rewards_train/1-w": 1.8365445137023926, "rewards_train/2-2": 2.759251356124878, "rewards_train/2-w": -1.3121408224105835, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9994139671325684, "rewards_train/margins_1": 2.05684432387352, "rewards_train/margins_2": 4.071392178535461, "step": 173 }, { "epoch": 0.52, "learning_rate": 4.425653231231344e-06, "loss": 1.0484, "step": 174 }, { "epoch": 0.52, "logps_train/policy_1_2": -184.42527770996094, "logps_train/policy_1_l": -143.25506591796875, "logps_train/policy_1_w": -104.03730773925781, "logps_train/policy_2_2": -133.48924255371094, "logps_train/policy_2_w": -163.96307373046875, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -124.5, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": -0.6265121698379517, "rewards_train/1-l": -1.8594902753829956, "rewards_train/1-w": 2.0465619564056396, "rewards_train/2-2": 2.433497905731201, "rewards_train/2-w": -0.9390814304351807, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9060522317886353, "rewards_train/margins_1": 2.6730741262435913, "rewards_train/margins_2": 3.372579336166382, "step": 174 }, { "epoch": 0.52, "logps_train/policy_1_2": -152.59738159179688, "logps_train/policy_1_l": -151.06808471679688, "logps_train/policy_1_w": -112.15274047851562, "logps_train/policy_2_2": -104.9736328125, "logps_train/policy_2_w": -156.21237182617188, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -1.2196990251541138, "rewards_train/1-l": -1.858272910118103, "rewards_train/1-w": 1.8874597549438477, "rewards_train/2-2": 1.8752931356430054, "rewards_train/2-w": -0.5102032423019409, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.7457326650619507, "rewards_train/margins_1": 3.1071587800979614, "rewards_train/margins_2": 2.3854963779449463, "step": 174 }, { "epoch": 0.52, "logps_train/policy_1_2": -165.88662719726562, "logps_train/policy_1_l": -121.15614318847656, "logps_train/policy_1_w": -123.42185974121094, "logps_train/policy_2_2": -115.03764343261719, "logps_train/policy_2_w": -183.78598022460938, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.9320228099822998, "rewards_train/1-l": -1.6126846075057983, "rewards_train/1-w": 2.2117199897766113, "rewards_train/2-2": 1.9528764486312866, "rewards_train/2-w": -1.3352370262145996, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8244045972824097, "rewards_train/margins_1": 3.143742799758911, "rewards_train/margins_2": 3.2881134748458862, "step": 174 }, { "epoch": 0.52, "logps_train/policy_1_2": -201.02279663085938, "logps_train/policy_1_l": -199.42608642578125, "logps_train/policy_1_w": -167.6297607421875, "logps_train/policy_2_2": -141.53684997558594, "logps_train/policy_2_w": -244.95538330078125, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -227.0, "rewards_train/1-2": -1.5241546630859375, "rewards_train/1-l": -1.8693175315856934, "rewards_train/1-w": 2.6071410179138184, "rewards_train/2-2": 2.181861400604248, "rewards_train/2-w": -1.7248345613479614, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.476458549499512, "rewards_train/margins_1": 4.131295680999756, "rewards_train/margins_2": 3.9066959619522095, "step": 174 }, { "epoch": 0.52, "logps_train/policy_1_2": -177.056396484375, "logps_train/policy_1_l": -178.4933624267578, "logps_train/policy_1_w": -114.19542694091797, "logps_train/policy_2_2": -117.0066909790039, "logps_train/policy_2_w": -176.1471710205078, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.1902108192443848, "rewards_train/1-l": -1.6899617910385132, "rewards_train/1-w": 2.9312081336975098, "rewards_train/2-2": 2.599135637283325, "rewards_train/2-w": -0.7494823336601257, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.621169924736023, "rewards_train/margins_1": 4.1214189529418945, "rewards_train/margins_2": 3.348617970943451, "step": 175 }, { "epoch": 0.52, "logps_train/policy_1_2": -174.1845703125, "logps_train/policy_1_l": -196.18443298339844, "logps_train/policy_1_w": -124.576904296875, "logps_train/policy_2_2": -126.73982238769531, "logps_train/policy_2_w": -186.01991271972656, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -1.036718726158142, "rewards_train/1-l": -2.0452992916107178, "rewards_train/1-w": 1.735864520072937, "rewards_train/2-2": 1.6654468774795532, "rewards_train/2-w": -2.041248321533203, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.781163811683655, "rewards_train/margins_1": 2.772583246231079, "rewards_train/margins_2": 3.7066951990127563, "step": 175 }, { "epoch": 0.52, "logps_train/policy_1_2": -142.10995483398438, "logps_train/policy_1_l": -119.27041625976562, "logps_train/policy_1_w": -120.78608703613281, "logps_train/policy_2_2": -95.29222106933594, "logps_train/policy_2_w": -188.45425415039062, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -0.8457615375518799, "rewards_train/1-l": -1.8912993669509888, "rewards_train/1-w": 2.1350631713867188, "rewards_train/2-2": 1.9240986108779907, "rewards_train/2-w": -1.9063634872436523, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.0263625383377075, "rewards_train/margins_1": 2.9808247089385986, "rewards_train/margins_2": 3.830462098121643, "step": 175 }, { "epoch": 0.52, "logps_train/policy_1_2": -172.6146240234375, "logps_train/policy_1_l": -222.48486328125, "logps_train/policy_1_w": -173.72174072265625, "logps_train/policy_2_2": -130.04354858398438, "logps_train/policy_2_w": -229.84767150878906, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": -0.18060430884361267, "rewards_train/1-l": -2.84926700592041, "rewards_train/1-w": 2.2465744018554688, "rewards_train/2-2": 2.354433059692383, "rewards_train/2-w": -1.3648451566696167, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.095841407775879, "rewards_train/margins_1": 2.4271787106990814, "rewards_train/margins_2": 3.7192782163619995, "step": 175 }, { "epoch": 0.53, "learning_rate": 4.409808865306932e-06, "loss": 0.9447, "step": 176 }, { "epoch": 0.53, "logps_train/policy_1_2": -238.0126190185547, "logps_train/policy_1_l": -173.56405639648438, "logps_train/policy_1_w": -141.8250732421875, "logps_train/policy_2_2": -170.1781005859375, "logps_train/policy_2_w": -198.86935424804688, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.032902717590332, "rewards_train/1-l": -1.5341391563415527, "rewards_train/1-w": 2.1780385971069336, "rewards_train/2-2": 2.986095428466797, "rewards_train/2-w": -1.2119373083114624, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.7121777534484863, "rewards_train/margins_1": 3.2109413146972656, "rewards_train/margins_2": 4.198032736778259, "step": 176 }, { "epoch": 0.53, "logps_train/policy_1_2": -206.41497802734375, "logps_train/policy_1_l": -234.33624267578125, "logps_train/policy_1_w": -151.9423370361328, "logps_train/policy_2_2": -138.87132263183594, "logps_train/policy_2_w": -228.47549438476562, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -1.892670750617981, "rewards_train/1-l": -3.080256223678589, "rewards_train/1-w": 2.653667449951172, "rewards_train/2-2": 2.2843527793884277, "rewards_train/2-w": -2.323526620864868, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.733923673629761, "rewards_train/margins_1": 4.546338200569153, "rewards_train/margins_2": 4.607879400253296, "step": 176 }, { "epoch": 0.53, "logps_train/policy_1_2": -198.52957153320312, "logps_train/policy_1_l": -174.56494140625, "logps_train/policy_1_w": -120.95738983154297, "logps_train/policy_2_2": -133.12826538085938, "logps_train/policy_2_w": -175.54385375976562, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -1.622489094734192, "rewards_train/1-l": -2.1704583168029785, "rewards_train/1-w": 2.0105602741241455, "rewards_train/2-2": 2.33111834526062, "rewards_train/2-w": -1.195499300956726, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.181018590927124, "rewards_train/margins_1": 3.6330493688583374, "rewards_train/margins_2": 3.526617646217346, "step": 176 }, { "epoch": 0.53, "logps_train/policy_1_2": -195.56292724609375, "logps_train/policy_1_l": -178.49908447265625, "logps_train/policy_1_w": -152.4234619140625, "logps_train/policy_2_2": -124.0376205444336, "logps_train/policy_2_w": -226.47109985351562, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -1.7328555583953857, "rewards_train/1-l": -1.9078192710876465, "rewards_train/1-w": 2.4474987983703613, "rewards_train/2-2": 2.208737850189209, "rewards_train/2-w": -1.9517977237701416, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.355318069458008, "rewards_train/margins_1": 4.180354356765747, "rewards_train/margins_2": 4.160535573959351, "step": 176 }, { "epoch": 0.53, "logps_train/policy_1_2": -181.28683471679688, "logps_train/policy_1_l": -174.07699584960938, "logps_train/policy_1_w": -129.5164794921875, "logps_train/policy_2_2": -133.17953491210938, "logps_train/policy_2_w": -189.0619354248047, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.5716516971588135, "rewards_train/1-l": -1.4037933349609375, "rewards_train/1-w": 2.4749159812927246, "rewards_train/2-2": 2.111733913421631, "rewards_train/2-w": -0.5806076526641846, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.878709316253662, "rewards_train/margins_1": 3.046567678451538, "rewards_train/margins_2": 2.6923415660858154, "step": 177 }, { "epoch": 0.53, "logps_train/policy_1_2": -157.3343963623047, "logps_train/policy_1_l": -175.8809356689453, "logps_train/policy_1_w": -117.71598815917969, "logps_train/policy_2_2": -117.24177551269531, "logps_train/policy_2_w": -156.7652587890625, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -0.923087477684021, "rewards_train/1-l": -1.5740315914154053, "rewards_train/1-w": 1.9077966213226318, "rewards_train/2-2": 1.4947673082351685, "rewards_train/2-w": -0.48453283309936523, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.481828212738037, "rewards_train/margins_1": 2.830884099006653, "rewards_train/margins_2": 1.9793001413345337, "step": 177 }, { "epoch": 0.53, "logps_train/policy_1_2": -173.4808349609375, "logps_train/policy_1_l": -140.16087341308594, "logps_train/policy_1_w": -101.4140625, "logps_train/policy_2_2": -115.86146545410156, "logps_train/policy_2_w": -157.29544067382812, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -124.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -0.9965208172798157, "rewards_train/1-l": -1.0619248151779175, "rewards_train/1-w": 2.2996344566345215, "rewards_train/2-2": 2.351597785949707, "rewards_train/2-w": -0.6394060254096985, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.361559271812439, "rewards_train/margins_1": 3.296155273914337, "rewards_train/margins_2": 2.9910038113594055, "step": 177 }, { "epoch": 0.53, "logps_train/policy_1_2": -177.01968383789062, "logps_train/policy_1_l": -142.0914306640625, "logps_train/policy_1_w": -128.34527587890625, "logps_train/policy_2_2": -129.06369018554688, "logps_train/policy_2_w": -189.834716796875, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.2691571712493896, "rewards_train/1-l": -1.2579703330993652, "rewards_train/1-w": 2.1420342922210693, "rewards_train/2-2": 1.4897257089614868, "rewards_train/2-w": -1.3563239574432373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.4000046253204346, "rewards_train/margins_1": 3.411191463470459, "rewards_train/margins_2": 2.846049666404724, "step": 177 }, { "epoch": 0.53, "learning_rate": 4.393778105404051e-06, "loss": 0.9119, "step": 178 }, { "epoch": 0.53, "logps_train/policy_1_2": -172.1231689453125, "logps_train/policy_1_l": -160.58030700683594, "logps_train/policy_1_w": -121.83361053466797, "logps_train/policy_2_2": -123.08553314208984, "logps_train/policy_2_w": -170.27471923828125, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -0.756848931312561, "rewards_train/1-l": -1.0624746084213257, "rewards_train/1-w": 1.8167847394943237, "rewards_train/2-2": 2.0709877014160156, "rewards_train/2-w": -0.8627026081085205, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.8792593479156494, "rewards_train/margins_1": 2.5736336708068848, "rewards_train/margins_2": 2.933690309524536, "step": 178 }, { "epoch": 0.53, "logps_train/policy_1_2": -179.1568603515625, "logps_train/policy_1_l": -192.29556274414062, "logps_train/policy_1_w": -159.2602996826172, "logps_train/policy_2_2": -128.11642456054688, "logps_train/policy_2_w": -248.42556762695312, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": -0.7359992861747742, "rewards_train/1-l": -1.818617582321167, "rewards_train/1-w": 2.990767478942871, "rewards_train/2-2": 2.057889461517334, "rewards_train/2-w": -1.8988062143325806, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.809385061264038, "rewards_train/margins_1": 3.7267667651176453, "rewards_train/margins_2": 3.9566956758499146, "step": 178 }, { "epoch": 0.53, "logps_train/policy_1_2": -262.345703125, "logps_train/policy_1_l": -217.05303955078125, "logps_train/policy_1_w": -127.22505950927734, "logps_train/policy_2_2": -174.78070068359375, "logps_train/policy_2_w": -204.28802490234375, "logps_train/ref_1_2": -239.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": -2.4207513332366943, "rewards_train/1-l": -2.0440235137939453, "rewards_train/1-w": 2.860891819000244, "rewards_train/2-2": 2.7821834087371826, "rewards_train/2-w": -1.5588808059692383, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.9049153327941895, "rewards_train/margins_1": 5.2816431522369385, "rewards_train/margins_2": 4.341064214706421, "step": 178 }, { "epoch": 0.53, "logps_train/policy_1_2": -140.56724548339844, "logps_train/policy_1_l": -117.56121826171875, "logps_train/policy_1_w": -76.59255981445312, "logps_train/policy_2_2": -98.54898071289062, "logps_train/policy_2_w": -114.1669921875, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": 0.005872607231140137, "rewards_train/1-l": -1.4186205863952637, "rewards_train/1-w": 1.5151584148406982, "rewards_train/2-2": 2.314047336578369, "rewards_train/2-w": -0.5098630785942078, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.933779001235962, "rewards_train/margins_1": 1.509285807609558, "rewards_train/margins_2": 2.823910415172577, "step": 178 }, { "epoch": 0.54, "logps_train/policy_1_2": -149.0926513671875, "logps_train/policy_1_l": -156.85635375976562, "logps_train/policy_1_w": -132.18838500976562, "logps_train/policy_2_2": -108.10063934326172, "logps_train/policy_2_w": -178.95230102539062, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -0.5522329211235046, "rewards_train/1-l": -1.522842526435852, "rewards_train/1-w": 1.8100674152374268, "rewards_train/2-2": 1.8454053401947021, "rewards_train/2-w": -0.6444500684738159, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.332909941673279, "rewards_train/margins_1": 2.3623003363609314, "rewards_train/margins_2": 2.489855408668518, "step": 179 }, { "epoch": 0.54, "logps_train/policy_1_2": -154.53118896484375, "logps_train/policy_1_l": -137.18429565429688, "logps_train/policy_1_w": -121.93208312988281, "logps_train/policy_2_2": -117.89163970947266, "logps_train/policy_2_w": -172.60565185546875, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -0.22469988465309143, "rewards_train/1-l": -1.420089602470398, "rewards_train/1-w": 2.1312057971954346, "rewards_train/2-2": 1.9259239435195923, "rewards_train/2-w": -0.8109550476074219, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.5512953996658325, "rewards_train/margins_1": 2.355905681848526, "rewards_train/margins_2": 2.736878991127014, "step": 179 }, { "epoch": 0.54, "logps_train/policy_1_2": -188.4093017578125, "logps_train/policy_1_l": -214.83370971679688, "logps_train/policy_1_w": -145.28067016601562, "logps_train/policy_2_2": -127.45063781738281, "logps_train/policy_2_w": -212.4959716796875, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -1.1448360681533813, "rewards_train/1-l": -2.3341526985168457, "rewards_train/1-w": 2.7036709785461426, "rewards_train/2-2": 2.521440267562866, "rewards_train/2-w": -1.0429561138153076, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.037823677062988, "rewards_train/margins_1": 3.848507046699524, "rewards_train/margins_2": 3.564396381378174, "step": 179 }, { "epoch": 0.54, "logps_train/policy_1_2": -188.0557861328125, "logps_train/policy_1_l": -168.44906616210938, "logps_train/policy_1_w": -110.37958526611328, "logps_train/policy_2_2": -135.3687286376953, "logps_train/policy_2_w": -161.91189575195312, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -1.2688602209091187, "rewards_train/1-l": -1.3150229454040527, "rewards_train/1-w": 1.3464165925979614, "rewards_train/2-2": 1.8541418313980103, "rewards_train/2-w": -1.5056421756744385, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.661439538002014, "rewards_train/margins_1": 2.61527681350708, "rewards_train/margins_2": 3.3597840070724487, "step": 179 }, { "epoch": 0.54, "learning_rate": 4.377562516096608e-06, "loss": 0.9756, "step": 180 }, { "epoch": 0.54, "logps_train/policy_1_2": -184.88912963867188, "logps_train/policy_1_l": -149.05764770507812, "logps_train/policy_1_w": -108.90388488769531, "logps_train/policy_2_2": -126.55671691894531, "logps_train/policy_2_w": -158.46286010742188, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -1.0982890129089355, "rewards_train/1-l": -1.658156394958496, "rewards_train/1-w": 2.107072591781616, "rewards_train/2-2": 2.329483985900879, "rewards_train/2-w": -0.8501937389373779, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.7652289867401123, "rewards_train/margins_1": 3.2053616046905518, "rewards_train/margins_2": 3.179677724838257, "step": 180 }, { "epoch": 0.54, "logps_train/policy_1_2": -183.58251953125, "logps_train/policy_1_l": -138.83367919921875, "logps_train/policy_1_w": -120.47630310058594, "logps_train/policy_2_2": -131.30450439453125, "logps_train/policy_2_w": -170.3918914794922, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.2924309968948364, "rewards_train/1-l": -0.9863957166671753, "rewards_train/1-w": 1.7261978387832642, "rewards_train/2-2": 1.7470884323120117, "rewards_train/2-w": -1.3044226169586182, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.7125935554504395, "rewards_train/margins_1": 3.0186288356781006, "rewards_train/margins_2": 3.05151104927063, "step": 180 }, { "epoch": 0.54, "logps_train/policy_1_2": -196.20956420898438, "logps_train/policy_1_l": -127.21123504638672, "logps_train/policy_1_w": -130.2869110107422, "logps_train/policy_2_2": -127.1905517578125, "logps_train/policy_2_w": -209.03436279296875, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -2.2861900329589844, "rewards_train/1-l": -0.9944270849227905, "rewards_train/1-w": 2.4426960945129395, "rewards_train/2-2": 2.111804962158203, "rewards_train/2-w": -2.3858585357666016, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.43712317943573, "rewards_train/margins_1": 4.728886127471924, "rewards_train/margins_2": 4.497663497924805, "step": 180 }, { "epoch": 0.54, "logps_train/policy_1_2": -131.60020446777344, "logps_train/policy_1_l": -103.0683364868164, "logps_train/policy_1_w": -93.25563049316406, "logps_train/policy_2_2": -96.86137390136719, "logps_train/policy_2_w": -125.96920013427734, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": -0.6471294164657593, "rewards_train/1-l": -0.5238020420074463, "rewards_train/1-w": 1.8597891330718994, "rewards_train/2-2": 1.3072211742401123, "rewards_train/2-w": -0.008639141917228699, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 2.3835911750793457, "rewards_train/margins_1": 2.5069185495376587, "rewards_train/margins_2": 1.315860316157341, "step": 180 }, { "epoch": 0.54, "logps_train/policy_1_2": -179.99005126953125, "logps_train/policy_1_l": -165.6209259033203, "logps_train/policy_1_w": -106.92498779296875, "logps_train/policy_2_2": -124.5896224975586, "logps_train/policy_2_w": -152.0529022216797, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -1.0306450128555298, "rewards_train/1-l": -1.805452585220337, "rewards_train/1-w": 1.8084290027618408, "rewards_train/2-2": 2.3019754886627197, "rewards_train/2-w": -0.9117844104766846, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6138815879821777, "rewards_train/margins_1": 2.8390740156173706, "rewards_train/margins_2": 3.2137598991394043, "step": 181 }, { "epoch": 0.54, "logps_train/policy_1_2": -232.99038696289062, "logps_train/policy_1_l": -151.95770263671875, "logps_train/policy_1_w": -145.57620239257812, "logps_train/policy_2_2": -149.53286743164062, "logps_train/policy_2_w": -235.49203491210938, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -2.039663314819336, "rewards_train/1-l": -1.627997636795044, "rewards_train/1-w": 2.2999954223632812, "rewards_train/2-2": 3.2654621601104736, "rewards_train/2-w": -3.0007662773132324, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.927993059158325, "rewards_train/margins_1": 4.339658737182617, "rewards_train/margins_2": 6.266228437423706, "step": 181 }, { "epoch": 0.54, "logps_train/policy_1_2": -183.26048278808594, "logps_train/policy_1_l": -158.08641052246094, "logps_train/policy_1_w": -102.79190826416016, "logps_train/policy_2_2": -115.98916625976562, "logps_train/policy_2_w": -156.4669189453125, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -1.8580800294876099, "rewards_train/1-l": -1.7927168607711792, "rewards_train/1-w": 2.446199417114258, "rewards_train/2-2": 1.9807707071304321, "rewards_train/2-w": -0.8896603584289551, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.238916277885437, "rewards_train/margins_1": 4.304279446601868, "rewards_train/margins_2": 2.870431065559387, "step": 181 }, { "epoch": 0.54, "logps_train/policy_1_2": -206.43841552734375, "logps_train/policy_1_l": -178.37109375, "logps_train/policy_1_w": -142.20616149902344, "logps_train/policy_2_2": -154.34597778320312, "logps_train/policy_2_w": -200.75906372070312, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -0.9418877363204956, "rewards_train/1-l": -1.7317086458206177, "rewards_train/1-w": 2.2358293533325195, "rewards_train/2-2": 2.1794657707214355, "rewards_train/2-w": -1.3192650079727173, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.967537999153137, "rewards_train/margins_1": 3.177717089653015, "rewards_train/margins_2": 3.498730778694153, "step": 181 }, { "epoch": 0.54, "learning_rate": 4.361163679997532e-06, "loss": 1.0956, "step": 182 }, { "epoch": 0.54, "logps_train/policy_1_2": -197.92919921875, "logps_train/policy_1_l": -224.48837280273438, "logps_train/policy_1_w": -140.598388671875, "logps_train/policy_2_2": -142.03182983398438, "logps_train/policy_2_w": -207.430419921875, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.4454588890075684, "rewards_train/1-l": -1.9128997325897217, "rewards_train/1-w": 2.3986573219299316, "rewards_train/2-2": 1.6952557563781738, "rewards_train/2-w": -1.0662848949432373, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.311557054519653, "rewards_train/margins_1": 3.8441162109375, "rewards_train/margins_2": 2.761540651321411, "step": 182 }, { "epoch": 0.54, "logps_train/policy_1_2": -152.0169219970703, "logps_train/policy_1_l": -102.11506652832031, "logps_train/policy_1_w": -114.37218475341797, "logps_train/policy_2_2": -110.84838104248047, "logps_train/policy_2_w": -151.81576538085938, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.943098247051239, "rewards_train/1-l": -1.3581130504608154, "rewards_train/1-w": 1.7459852695465088, "rewards_train/2-2": 1.6203374862670898, "rewards_train/2-w": -0.4026695787906647, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.104098320007324, "rewards_train/margins_1": 2.689083516597748, "rewards_train/margins_2": 2.0230070650577545, "step": 182 }, { "epoch": 0.54, "logps_train/policy_1_2": -207.24472045898438, "logps_train/policy_1_l": -235.4202423095703, "logps_train/policy_1_w": -143.43943786621094, "logps_train/policy_2_2": -138.07342529296875, "logps_train/policy_2_w": -217.5612335205078, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -207.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -1.170565128326416, "rewards_train/1-l": -2.8013997077941895, "rewards_train/1-w": 2.797071695327759, "rewards_train/2-2": 3.0020334720611572, "rewards_train/2-w": -1.213545799255371, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.598471403121948, "rewards_train/margins_1": 3.967636823654175, "rewards_train/margins_2": 4.215579271316528, "step": 182 }, { "epoch": 0.54, "logps_train/policy_1_2": -224.51956176757812, "logps_train/policy_1_l": -207.6107940673828, "logps_train/policy_1_w": -122.37906646728516, "logps_train/policy_2_2": -160.41030883789062, "logps_train/policy_2_w": -194.26156616210938, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -1.2514691352844238, "rewards_train/1-l": -1.4044406414031982, "rewards_train/1-w": 2.701350688934326, "rewards_train/2-2": 2.233870506286621, "rewards_train/2-w": -1.723813533782959, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.105791330337524, "rewards_train/margins_1": 3.95281982421875, "rewards_train/margins_2": 3.95768404006958, "step": 182 }, { "epoch": 0.55, "logps_train/policy_1_2": -168.65255737304688, "logps_train/policy_1_l": -108.64479064941406, "logps_train/policy_1_w": -105.03958129882812, "logps_train/policy_2_2": -110.64682006835938, "logps_train/policy_2_w": -164.93844604492188, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.8108608722686768, "rewards_train/1-l": -0.8496353626251221, "rewards_train/1-w": 1.8460417985916138, "rewards_train/2-2": 1.6745762825012207, "rewards_train/2-w": -1.7151339054107666, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.695677161216736, "rewards_train/margins_1": 3.6569026708602905, "rewards_train/margins_2": 3.3897101879119873, "step": 183 }, { "epoch": 0.55, "logps_train/policy_1_2": -188.79446411132812, "logps_train/policy_1_l": -226.08013916015625, "logps_train/policy_1_w": -131.84567260742188, "logps_train/policy_2_2": -129.71298217773438, "logps_train/policy_2_w": -196.27261352539062, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -1.3462440967559814, "rewards_train/1-l": -2.471491813659668, "rewards_train/1-w": 1.948050856590271, "rewards_train/2-2": 2.119523048400879, "rewards_train/2-w": -1.815542221069336, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.419542670249939, "rewards_train/margins_1": 3.2942949533462524, "rewards_train/margins_2": 3.935065269470215, "step": 183 }, { "epoch": 0.55, "logps_train/policy_1_2": -249.26675415039062, "logps_train/policy_1_l": -188.64340209960938, "logps_train/policy_1_w": -143.57986450195312, "logps_train/policy_2_2": -175.74392700195312, "logps_train/policy_2_w": -202.64044189453125, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.9090983867645264, "rewards_train/1-l": -1.8040138483047485, "rewards_train/1-w": 3.0244345664978027, "rewards_train/2-2": 2.6396708488464355, "rewards_train/2-w": -0.5351388454437256, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.828448414802551, "rewards_train/margins_1": 4.933532953262329, "rewards_train/margins_2": 3.174809694290161, "step": 183 }, { "epoch": 0.55, "logps_train/policy_1_2": -152.9252471923828, "logps_train/policy_1_l": -133.16458129882812, "logps_train/policy_1_w": -141.38316345214844, "logps_train/policy_2_2": -113.55653381347656, "logps_train/policy_2_w": -205.32638549804688, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -0.5768997669219971, "rewards_train/1-l": -1.2969881296157837, "rewards_train/1-w": 2.4466445446014404, "rewards_train/2-2": 1.4990344047546387, "rewards_train/2-w": -1.1763883829116821, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.743632674217224, "rewards_train/margins_1": 3.0235443115234375, "rewards_train/margins_2": 2.675422787666321, "step": 183 }, { "epoch": 0.55, "learning_rate": 4.344583197604319e-06, "loss": 0.9774, "step": 184 }, { "epoch": 0.55, "logps_train/policy_1_2": -192.38192749023438, "logps_train/policy_1_l": -188.15789794921875, "logps_train/policy_1_w": -132.07406616210938, "logps_train/policy_2_2": -131.73980712890625, "logps_train/policy_2_w": -185.58460998535156, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -1.888777732849121, "rewards_train/1-l": -1.956024169921875, "rewards_train/1-w": 2.3175759315490723, "rewards_train/2-2": 2.018596649169922, "rewards_train/2-w": -1.0356093645095825, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.273600101470947, "rewards_train/margins_1": 4.206353664398193, "rewards_train/margins_2": 3.0542060136795044, "step": 184 }, { "epoch": 0.55, "logps_train/policy_1_2": -192.7578125, "logps_train/policy_1_l": -135.3411102294922, "logps_train/policy_1_w": -121.79647827148438, "logps_train/policy_2_2": -127.667724609375, "logps_train/policy_2_w": -175.15402221679688, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.7554707527160645, "rewards_train/1-l": -1.3908252716064453, "rewards_train/1-w": 2.404043674468994, "rewards_train/2-2": 2.153735637664795, "rewards_train/2-w": -0.518528938293457, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7948689460754395, "rewards_train/margins_1": 4.159514427185059, "rewards_train/margins_2": 2.672264575958252, "step": 184 }, { "epoch": 0.55, "logps_train/policy_1_2": -194.86663818359375, "logps_train/policy_1_l": -167.86831665039062, "logps_train/policy_1_w": -122.68539428710938, "logps_train/policy_2_2": -135.81997680664062, "logps_train/policy_2_w": -166.13320922851562, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.0440847873687744, "rewards_train/1-l": -1.5233074426651, "rewards_train/1-w": 1.671694278717041, "rewards_train/2-2": 2.3396825790405273, "rewards_train/2-w": -0.5586335062980652, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.195001721382141, "rewards_train/margins_1": 2.7157790660858154, "rewards_train/margins_2": 2.8983160853385925, "step": 184 }, { "epoch": 0.55, "logps_train/policy_1_2": -161.43963623046875, "logps_train/policy_1_l": -115.0152359008789, "logps_train/policy_1_w": -121.33073425292969, "logps_train/policy_2_2": -109.80801391601562, "logps_train/policy_2_w": -179.59765625, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -101.5, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.7068549394607544, "rewards_train/1-l": -1.336679220199585, "rewards_train/1-w": 1.949349045753479, "rewards_train/2-2": 1.5084564685821533, "rewards_train/2-w": -1.7089831829071045, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.286028265953064, "rewards_train/margins_1": 3.6562039852142334, "rewards_train/margins_2": 3.217439651489258, "step": 184 }, { "epoch": 0.55, "logps_train/policy_1_2": -216.61668395996094, "logps_train/policy_1_l": -204.54986572265625, "logps_train/policy_1_w": -148.8216552734375, "logps_train/policy_2_2": -162.57228088378906, "logps_train/policy_2_w": -210.4881591796875, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -0.3801254630088806, "rewards_train/1-l": -2.353131055831909, "rewards_train/1-w": 3.2317018508911133, "rewards_train/2-2": 2.8830058574676514, "rewards_train/2-w": -0.4367064833641052, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.5848329067230225, "rewards_train/margins_1": 3.611827313899994, "rewards_train/margins_2": 3.3197123408317566, "step": 185 }, { "epoch": 0.55, "logps_train/policy_1_2": -130.00067138671875, "logps_train/policy_1_l": -173.48016357421875, "logps_train/policy_1_w": -144.1427459716797, "logps_train/policy_2_2": -100.60981750488281, "logps_train/policy_2_w": -177.48748779296875, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": 0.1870427280664444, "rewards_train/1-l": -2.153461456298828, "rewards_train/1-w": 0.96795254945755, "rewards_train/2-2": 2.015580654144287, "rewards_train/2-w": -0.937811553478241, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.121414005756378, "rewards_train/margins_1": 0.7809098213911057, "rewards_train/margins_2": 2.953392207622528, "step": 185 }, { "epoch": 0.55, "logps_train/policy_1_2": -129.81845092773438, "logps_train/policy_1_l": -149.10401916503906, "logps_train/policy_1_w": -119.3786849975586, "logps_train/policy_2_2": -85.85922241210938, "logps_train/policy_2_w": -189.09439086914062, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -0.8240339159965515, "rewards_train/1-l": -1.3831562995910645, "rewards_train/1-w": 2.3875222206115723, "rewards_train/2-2": 1.531851053237915, "rewards_train/2-w": -1.8078763484954834, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.7706785202026367, "rewards_train/margins_1": 3.211556136608124, "rewards_train/margins_2": 3.3397274017333984, "step": 185 }, { "epoch": 0.55, "logps_train/policy_1_2": -281.56591796875, "logps_train/policy_1_l": -250.34786987304688, "logps_train/policy_1_w": -138.65785217285156, "logps_train/policy_2_2": -198.88760375976562, "logps_train/policy_2_w": -201.26312255859375, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.8175266981124878, "rewards_train/1-l": -1.790840744972229, "rewards_train/1-w": 2.024033784866333, "rewards_train/2-2": 3.3331146240234375, "rewards_train/2-w": -1.1658635139465332, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.814874529838562, "rewards_train/margins_1": 3.841560482978821, "rewards_train/margins_2": 4.498978137969971, "step": 185 }, { "epoch": 0.56, "learning_rate": 4.327822687142818e-06, "loss": 1.4319, "step": 186 }, { "epoch": 0.56, "logps_train/policy_1_2": -185.28237915039062, "logps_train/policy_1_l": -118.27897644042969, "logps_train/policy_1_w": -117.72676086425781, "logps_train/policy_2_2": -130.02774047851562, "logps_train/policy_2_w": -159.83572387695312, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -0.7727688550949097, "rewards_train/1-l": -0.821354866027832, "rewards_train/1-w": 2.194902181625366, "rewards_train/2-2": 2.5347256660461426, "rewards_train/2-w": -0.3894304633140564, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.0162570476531982, "rewards_train/margins_1": 2.967671036720276, "rewards_train/margins_2": 2.924156129360199, "step": 186 }, { "epoch": 0.56, "logps_train/policy_1_2": -122.88081359863281, "logps_train/policy_1_l": -76.16462707519531, "logps_train/policy_1_w": -86.22714233398438, "logps_train/policy_2_2": -87.19654846191406, "logps_train/policy_2_w": -117.1939926147461, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -67.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -118.5, "rewards_train/1-2": 0.0811077356338501, "rewards_train/1-l": -0.9206616878509521, "rewards_train/1-w": 1.8666402101516724, "rewards_train/2-2": 2.303978443145752, "rewards_train/2-w": 0.12474125623703003, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.7873018980026245, "rewards_train/margins_1": 1.7855324745178223, "rewards_train/margins_2": 2.179237186908722, "step": 186 }, { "epoch": 0.56, "logps_train/policy_1_2": -148.9330291748047, "logps_train/policy_1_l": -100.80096435546875, "logps_train/policy_1_w": -104.57011413574219, "logps_train/policy_2_2": -107.36915588378906, "logps_train/policy_2_w": -150.0179443359375, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -0.3675210773944855, "rewards_train/1-l": -0.7329282760620117, "rewards_train/1-w": 1.785956621170044, "rewards_train/2-2": 1.9830065965652466, "rewards_train/2-w": -0.6846071481704712, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.5188848972320557, "rewards_train/margins_1": 2.1534776985645294, "rewards_train/margins_2": 2.6676137447357178, "step": 186 }, { "epoch": 0.56, "logps_train/policy_1_2": -165.3057403564453, "logps_train/policy_1_l": -124.6360855102539, "logps_train/policy_1_w": -106.383544921875, "logps_train/policy_2_2": -110.34378814697266, "logps_train/policy_2_w": -160.1063690185547, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.0669020414352417, "rewards_train/1-l": -0.7150489091873169, "rewards_train/1-w": 1.9224613904953003, "rewards_train/2-2": 2.322652816772461, "rewards_train/2-w": -1.1591717004776, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.637510299682617, "rewards_train/margins_1": 2.989363431930542, "rewards_train/margins_2": 3.481824517250061, "step": 186 }, { "epoch": 0.56, "logps_train/policy_1_2": -121.61410522460938, "logps_train/policy_1_l": -145.54769897460938, "logps_train/policy_1_w": -139.58749389648438, "logps_train/policy_2_2": -88.60568237304688, "logps_train/policy_2_w": -192.1602783203125, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -0.5179600119590759, "rewards_train/1-l": -1.4656155109405518, "rewards_train/1-w": 2.358438014984131, "rewards_train/2-2": 1.1699793338775635, "rewards_train/2-w": -0.9769662618637085, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8240535259246826, "rewards_train/margins_1": 2.876398026943207, "rewards_train/margins_2": 2.146945595741272, "step": 187 }, { "epoch": 0.56, "logps_train/policy_1_2": -240.3101806640625, "logps_train/policy_1_l": -171.55859375, "logps_train/policy_1_w": -126.01687622070312, "logps_train/policy_2_2": -158.07933044433594, "logps_train/policy_2_w": -185.30215454101562, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -2.3610949516296387, "rewards_train/1-l": -1.2194838523864746, "rewards_train/1-w": 2.065890312194824, "rewards_train/2-2": 2.783473491668701, "rewards_train/2-w": -1.065761685371399, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.285374164581299, "rewards_train/margins_1": 4.426985263824463, "rewards_train/margins_2": 3.8492351770401, "step": 187 }, { "epoch": 0.56, "logps_train/policy_1_2": -152.25135803222656, "logps_train/policy_1_l": -124.9340591430664, "logps_train/policy_1_w": -80.79383850097656, "logps_train/policy_2_2": -101.49615478515625, "logps_train/policy_2_w": -133.00863647460938, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -110.5, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": -0.6934946179389954, "rewards_train/1-l": -1.4766335487365723, "rewards_train/1-w": 2.121983766555786, "rewards_train/2-2": 2.2078065872192383, "rewards_train/2-w": -0.7895366549491882, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.5986173152923584, "rewards_train/margins_1": 2.8154783844947815, "rewards_train/margins_2": 2.9973432421684265, "step": 187 }, { "epoch": 0.56, "logps_train/policy_1_2": -152.2606964111328, "logps_train/policy_1_l": -114.6102294921875, "logps_train/policy_1_w": -110.96043395996094, "logps_train/policy_2_2": -102.18350219726562, "logps_train/policy_2_w": -153.77288818359375, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -0.8280231952667236, "rewards_train/1-l": -0.5298210382461548, "rewards_train/1-w": 1.7559103965759277, "rewards_train/2-2": 2.026571273803711, "rewards_train/2-w": -0.48353859782218933, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.2857314348220825, "rewards_train/margins_1": 2.5839335918426514, "rewards_train/margins_2": 2.5101098716259003, "step": 187 }, { "epoch": 0.56, "learning_rate": 4.310883784409307e-06, "loss": 1.1248, "step": 188 }, { "epoch": 0.56, "logps_train/policy_1_2": -202.06016540527344, "logps_train/policy_1_l": -197.9180908203125, "logps_train/policy_1_w": -145.04808044433594, "logps_train/policy_2_2": -144.19329833984375, "logps_train/policy_2_w": -212.9446563720703, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -0.45328307151794434, "rewards_train/1-l": -1.7007744312286377, "rewards_train/1-w": 2.6959733963012695, "rewards_train/2-2": 2.5591869354248047, "rewards_train/2-w": -1.2608721256256104, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.396747827529907, "rewards_train/margins_1": 3.149256467819214, "rewards_train/margins_2": 3.820059061050415, "step": 188 }, { "epoch": 0.56, "logps_train/policy_1_2": -226.6660614013672, "logps_train/policy_1_l": -136.04278564453125, "logps_train/policy_1_w": -119.90377807617188, "logps_train/policy_2_2": -168.963134765625, "logps_train/policy_2_w": -163.94598388671875, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.5114294290542603, "rewards_train/1-l": -1.0288997888565063, "rewards_train/1-w": 1.9998447895050049, "rewards_train/2-2": 2.898803472518921, "rewards_train/2-w": -0.4932296872138977, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0287445783615112, "rewards_train/margins_1": 2.511274218559265, "rewards_train/margins_2": 3.3920331597328186, "step": 188 }, { "epoch": 0.56, "logps_train/policy_1_2": -192.78863525390625, "logps_train/policy_1_l": -156.97735595703125, "logps_train/policy_1_w": -141.91366577148438, "logps_train/policy_2_2": -136.2188720703125, "logps_train/policy_2_w": -213.56503295898438, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.2116903066635132, "rewards_train/1-l": -1.742624044418335, "rewards_train/1-w": 2.9898838996887207, "rewards_train/2-2": 2.0565242767333984, "rewards_train/2-w": -1.3981045484542847, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.732507944107056, "rewards_train/margins_1": 4.201574206352234, "rewards_train/margins_2": 3.454628825187683, "step": 188 }, { "epoch": 0.56, "logps_train/policy_1_2": -230.65093994140625, "logps_train/policy_1_l": -194.02618408203125, "logps_train/policy_1_w": -134.4357452392578, "logps_train/policy_2_2": -165.32281494140625, "logps_train/policy_2_w": -192.6763916015625, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.491706132888794, "rewards_train/1-l": -1.491974115371704, "rewards_train/1-w": 2.434159755706787, "rewards_train/2-2": 2.6684999465942383, "rewards_train/2-w": -0.9078723192214966, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.926133871078491, "rewards_train/margins_1": 3.925865888595581, "rewards_train/margins_2": 3.576372265815735, "step": 188 }, { "epoch": 0.57, "logps_train/policy_1_2": -208.45407104492188, "logps_train/policy_1_l": -170.4265594482422, "logps_train/policy_1_w": -111.8375244140625, "logps_train/policy_2_2": -147.140380859375, "logps_train/policy_2_w": -165.36923217773438, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": -0.8895474076271057, "rewards_train/1-l": -1.4576945304870605, "rewards_train/1-w": 1.7974973917007446, "rewards_train/2-2": 2.411743402481079, "rewards_train/2-w": -1.0974688529968262, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.255191922187805, "rewards_train/margins_1": 2.6870447993278503, "rewards_train/margins_2": 3.5092122554779053, "step": 189 }, { "epoch": 0.57, "logps_train/policy_1_2": -190.3585968017578, "logps_train/policy_1_l": -176.06854248046875, "logps_train/policy_1_w": -124.15679931640625, "logps_train/policy_2_2": -135.72296142578125, "logps_train/policy_2_w": -177.42227172851562, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.0217971801757812, "rewards_train/1-l": -1.1865413188934326, "rewards_train/1-w": 1.6685973405838013, "rewards_train/2-2": 1.905829668045044, "rewards_train/2-w": -0.9933992624282837, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.855138659477234, "rewards_train/margins_1": 2.6903945207595825, "rewards_train/margins_2": 2.8992289304733276, "step": 189 }, { "epoch": 0.57, "logps_train/policy_1_2": -211.65972900390625, "logps_train/policy_1_l": -190.931884765625, "logps_train/policy_1_w": -123.68234252929688, "logps_train/policy_2_2": -129.0509033203125, "logps_train/policy_2_w": -187.43890380859375, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -2.2546443939208984, "rewards_train/1-l": -1.3745254278182983, "rewards_train/1-w": 2.2708282470703125, "rewards_train/2-2": 2.690613269805908, "rewards_train/2-w": -1.03373384475708, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.645353674888611, "rewards_train/margins_1": 4.525472640991211, "rewards_train/margins_2": 3.7243471145629883, "step": 189 }, { "epoch": 0.57, "logps_train/policy_1_2": -161.82186889648438, "logps_train/policy_1_l": -145.49282836914062, "logps_train/policy_1_w": -105.71289825439453, "logps_train/policy_2_2": -103.60289001464844, "logps_train/policy_2_w": -167.0128936767578, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -1.2954680919647217, "rewards_train/1-l": -1.3535785675048828, "rewards_train/1-w": 2.1630845069885254, "rewards_train/2-2": 1.629555344581604, "rewards_train/2-w": -1.1079299449920654, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.516663074493408, "rewards_train/margins_1": 3.458552598953247, "rewards_train/margins_2": 2.7374852895736694, "step": 189 }, { "epoch": 0.57, "learning_rate": 4.293768142610828e-06, "loss": 1.0237, "step": 190 }, { "epoch": 0.57, "logps_train/policy_1_2": -174.99276733398438, "logps_train/policy_1_l": -159.68833923339844, "logps_train/policy_1_w": -116.73566436767578, "logps_train/policy_2_2": -131.70062255859375, "logps_train/policy_2_w": -167.16970825195312, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -0.49732404947280884, "rewards_train/1-l": -1.3727879524230957, "rewards_train/1-w": 1.8772149085998535, "rewards_train/2-2": 2.062751054763794, "rewards_train/2-w": -0.9310342073440552, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.250002861022949, "rewards_train/margins_1": 2.3745389580726624, "rewards_train/margins_2": 2.993785262107849, "step": 190 }, { "epoch": 0.57, "logps_train/policy_1_2": -240.6796875, "logps_train/policy_1_l": -250.1857147216797, "logps_train/policy_1_w": -183.6329345703125, "logps_train/policy_2_2": -172.14019775390625, "logps_train/policy_2_w": -261.3819274902344, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -224.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -251.0, "rewards_train/1-2": -0.9695316553115845, "rewards_train/1-l": -2.6701340675354004, "rewards_train/1-w": 3.4183480739593506, "rewards_train/2-2": 3.1566824913024902, "rewards_train/2-w": -1.043270230293274, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.088482141494751, "rewards_train/margins_1": 4.387879729270935, "rewards_train/margins_2": 4.199952721595764, "step": 190 }, { "epoch": 0.57, "logps_train/policy_1_2": -189.15426635742188, "logps_train/policy_1_l": -147.57693481445312, "logps_train/policy_1_w": -115.23800659179688, "logps_train/policy_2_2": -133.99708557128906, "logps_train/policy_2_w": -153.57476806640625, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -1.0513643026351929, "rewards_train/1-l": -1.2993439435958862, "rewards_train/1-w": 1.5969021320343018, "rewards_train/2-2": 2.279783248901367, "rewards_train/2-w": -0.43794623017311096, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.896246075630188, "rewards_train/margins_1": 2.6482664346694946, "rewards_train/margins_2": 2.717729479074478, "step": 190 }, { "epoch": 0.57, "logps_train/policy_1_2": -132.23446655273438, "logps_train/policy_1_l": -114.14059448242188, "logps_train/policy_1_w": -106.32356262207031, "logps_train/policy_2_2": -90.06790161132812, "logps_train/policy_2_w": -154.77899169921875, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -0.7634373307228088, "rewards_train/1-l": -0.7611782550811768, "rewards_train/1-w": 2.535710096359253, "rewards_train/2-2": 1.606735348701477, "rewards_train/2-w": -0.41461771726608276, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.2968883514404297, "rewards_train/margins_1": 3.2991474270820618, "rewards_train/margins_2": 2.02135306596756, "step": 190 }, { "epoch": 0.57, "logps_train/policy_1_2": -116.81370544433594, "logps_train/policy_1_l": -98.72367095947266, "logps_train/policy_1_w": -68.14264678955078, "logps_train/policy_2_2": -76.75944519042969, "logps_train/policy_2_w": -109.39622497558594, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -85.5, "logps_train/ref_1_w": -79.5, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": -0.5544173717498779, "rewards_train/1-l": -1.3493199348449707, "rewards_train/1-w": 1.1578054428100586, "rewards_train/2-2": 1.545637607574463, "rewards_train/2-w": -1.0623767375946045, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.5071253776550293, "rewards_train/margins_1": 1.7122228145599365, "rewards_train/margins_2": 2.6080143451690674, "step": 191 }, { "epoch": 0.57, "logps_train/policy_1_2": -189.90386962890625, "logps_train/policy_1_l": -200.08187866210938, "logps_train/policy_1_w": -121.7430191040039, "logps_train/policy_2_2": -139.19277954101562, "logps_train/policy_2_w": -170.24453735351562, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -0.3458753228187561, "rewards_train/1-l": -1.8804482221603394, "rewards_train/1-w": 2.3826379776000977, "rewards_train/2-2": 2.8077728748321533, "rewards_train/2-w": -0.43612438440322876, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.263086199760437, "rewards_train/margins_1": 2.7285133004188538, "rewards_train/margins_2": 3.243897259235382, "step": 191 }, { "epoch": 0.57, "logps_train/policy_1_2": -151.4381866455078, "logps_train/policy_1_l": -122.60121154785156, "logps_train/policy_1_w": -112.490478515625, "logps_train/policy_2_2": -104.67745971679688, "logps_train/policy_2_w": -163.92457580566406, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": -0.8860062956809998, "rewards_train/1-l": -1.4973769187927246, "rewards_train/1-w": 2.1197023391723633, "rewards_train/2-2": 1.7763948440551758, "rewards_train/2-w": -0.9076924324035645, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.617079257965088, "rewards_train/margins_1": 3.005708634853363, "rewards_train/margins_2": 2.6840872764587402, "step": 191 }, { "epoch": 0.57, "logps_train/policy_1_2": -149.1572723388672, "logps_train/policy_1_l": -157.4358367919922, "logps_train/policy_1_w": -115.12409973144531, "logps_train/policy_2_2": -103.72309112548828, "logps_train/policy_2_w": -166.0158233642578, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.739946722984314, "rewards_train/1-l": -1.557694435119629, "rewards_train/1-w": 2.1800708770751953, "rewards_train/2-2": 1.9874565601348877, "rewards_train/2-w": -0.6674508452415466, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.737765312194824, "rewards_train/margins_1": 2.9200176000595093, "rewards_train/margins_2": 2.6549074053764343, "step": 191 }, { "epoch": 0.57, "learning_rate": 4.276477432203849e-06, "loss": 1.1002, "step": 192 }, { "epoch": 0.57, "logps_train/policy_1_2": -140.96929931640625, "logps_train/policy_1_l": -130.41571044921875, "logps_train/policy_1_w": -108.91327667236328, "logps_train/policy_2_2": -105.43592834472656, "logps_train/policy_2_w": -158.34011840820312, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -0.07505516707897186, "rewards_train/1-l": -1.7413758039474487, "rewards_train/1-w": 2.127422332763672, "rewards_train/2-2": 2.038438558578491, "rewards_train/2-w": -0.858230710029602, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8687981367111206, "rewards_train/margins_1": 2.2024774998426437, "rewards_train/margins_2": 2.8966692686080933, "step": 192 }, { "epoch": 0.57, "logps_train/policy_1_2": -251.54034423828125, "logps_train/policy_1_l": -202.2588653564453, "logps_train/policy_1_w": -164.06417846679688, "logps_train/policy_2_2": -181.9481658935547, "logps_train/policy_2_w": -245.29324340820312, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": -1.6657524108886719, "rewards_train/1-l": -1.2648872137069702, "rewards_train/1-w": 3.1736602783203125, "rewards_train/2-2": 2.5325264930725098, "rewards_train/2-w": -1.6286394596099854, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.438547492027283, "rewards_train/margins_1": 4.839412689208984, "rewards_train/margins_2": 4.161165952682495, "step": 192 }, { "epoch": 0.57, "logps_train/policy_1_2": -213.07667541503906, "logps_train/policy_1_l": -201.0314483642578, "logps_train/policy_1_w": -144.58792114257812, "logps_train/policy_2_2": -154.3660888671875, "logps_train/policy_2_w": -189.13150024414062, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -0.7572776079177856, "rewards_train/1-l": -1.4791204929351807, "rewards_train/1-w": 2.1919898986816406, "rewards_train/2-2": 2.629210948944092, "rewards_train/2-w": -0.2994791567325592, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.6711103916168213, "rewards_train/margins_1": 2.9492675065994263, "rewards_train/margins_2": 2.928690105676651, "step": 192 }, { "epoch": 0.57, "logps_train/policy_1_2": -212.53363037109375, "logps_train/policy_1_l": -203.83798217773438, "logps_train/policy_1_w": -98.26309967041016, "logps_train/policy_2_2": -142.67385864257812, "logps_train/policy_2_w": -145.0859832763672, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": -1.2271918058395386, "rewards_train/1-l": -1.8867274522781372, "rewards_train/1-w": 1.6075767278671265, "rewards_train/2-2": 2.448629379272461, "rewards_train/2-w": -0.8859418630599976, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.4943041801452637, "rewards_train/margins_1": 2.834768533706665, "rewards_train/margins_2": 3.3345712423324585, "step": 192 }, { "epoch": 0.58, "logps_train/policy_1_2": -237.48898315429688, "logps_train/policy_1_l": -207.51490783691406, "logps_train/policy_1_w": -165.39349365234375, "logps_train/policy_2_2": -163.22579956054688, "logps_train/policy_2_w": -233.62571716308594, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": -1.4668667316436768, "rewards_train/1-l": -2.1010990142822266, "rewards_train/1-w": 2.5325264930725098, "rewards_train/2-2": 2.801637887954712, "rewards_train/2-w": -1.0586665868759155, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.633625507354736, "rewards_train/margins_1": 3.9993932247161865, "rewards_train/margins_2": 3.8603044748306274, "step": 193 }, { "epoch": 0.58, "logps_train/policy_1_2": -178.08604431152344, "logps_train/policy_1_l": -211.0630645751953, "logps_train/policy_1_w": -155.43466186523438, "logps_train/policy_2_2": -139.16197204589844, "logps_train/policy_2_w": -229.6265106201172, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": -0.5551621317863464, "rewards_train/1-l": -1.5477855205535889, "rewards_train/1-w": 2.6623926162719727, "rewards_train/2-2": 1.7963522672653198, "rewards_train/2-w": -1.432183027267456, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.2101781368255615, "rewards_train/margins_1": 3.217554748058319, "rewards_train/margins_2": 3.228535294532776, "step": 193 }, { "epoch": 0.58, "logps_train/policy_1_2": -158.78704833984375, "logps_train/policy_1_l": -200.10501098632812, "logps_train/policy_1_w": -131.97183227539062, "logps_train/policy_2_2": -113.44242858886719, "logps_train/policy_2_w": -184.42306518554688, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -0.33378350734710693, "rewards_train/1-l": -2.0775911808013916, "rewards_train/1-w": 1.7182472944259644, "rewards_train/2-2": 2.1472368240356445, "rewards_train/2-w": -0.9321503639221191, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.795838475227356, "rewards_train/margins_1": 2.0520308017730713, "rewards_train/margins_2": 3.0793871879577637, "step": 193 }, { "epoch": 0.58, "logps_train/policy_1_2": -121.67840576171875, "logps_train/policy_1_l": -85.33270263671875, "logps_train/policy_1_w": -75.35637664794922, "logps_train/policy_2_2": -89.02171325683594, "logps_train/policy_2_w": -114.24796295166016, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -79.5, "logps_train/ref_1_w": -94.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -111.5, "rewards_train/1-2": -0.19987118244171143, "rewards_train/1-l": -0.5865904092788696, "rewards_train/1-w": 1.8491278886795044, "rewards_train/2-2": 1.8294687271118164, "rewards_train/2-w": -0.2706947922706604, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.435718297958374, "rewards_train/margins_1": 2.048999071121216, "rewards_train/margins_2": 2.100163519382477, "step": 193 }, { "epoch": 0.58, "learning_rate": 4.259013340731224e-06, "loss": 0.9913, "step": 194 }, { "epoch": 0.58, "logps_train/policy_1_2": -140.00424194335938, "logps_train/policy_1_l": -110.16494750976562, "logps_train/policy_1_w": -111.98422241210938, "logps_train/policy_2_2": -93.68524932861328, "logps_train/policy_2_w": -156.729736328125, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.7699552774429321, "rewards_train/1-l": -1.2174718379974365, "rewards_train/1-w": 2.756265163421631, "rewards_train/2-2": 1.952373743057251, "rewards_train/2-w": 0.2551512122154236, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9737370014190674, "rewards_train/margins_1": 3.526220440864563, "rewards_train/margins_2": 1.6972225308418274, "step": 194 }, { "epoch": 0.58, "logps_train/policy_1_2": -199.78436279296875, "logps_train/policy_1_l": -203.16641235351562, "logps_train/policy_1_w": -172.5622100830078, "logps_train/policy_2_2": -132.5375213623047, "logps_train/policy_2_w": -259.6253967285156, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -241.0, "rewards_train/1-2": -1.415546178817749, "rewards_train/1-l": -1.9453997611999512, "rewards_train/1-w": 3.4175946712493896, "rewards_train/2-2": 2.266169548034668, "rewards_train/2-w": -1.8745028972625732, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.362994432449341, "rewards_train/margins_1": 4.833140850067139, "rewards_train/margins_2": 4.140672445297241, "step": 194 }, { "epoch": 0.58, "logps_train/policy_1_2": -157.28749084472656, "logps_train/policy_1_l": -139.3002166748047, "logps_train/policy_1_w": -125.05978393554688, "logps_train/policy_2_2": -107.11019897460938, "logps_train/policy_2_w": -187.57591247558594, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -1.0629777908325195, "rewards_train/1-l": -1.381925344467163, "rewards_train/1-w": 2.531521797180176, "rewards_train/2-2": 1.8497216701507568, "rewards_train/2-w": -1.2794657945632935, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.913447141647339, "rewards_train/margins_1": 3.5944995880126953, "rewards_train/margins_2": 3.1291874647140503, "step": 194 }, { "epoch": 0.58, "logps_train/policy_1_2": -155.9112548828125, "logps_train/policy_1_l": -182.5601806640625, "logps_train/policy_1_w": -110.41838073730469, "logps_train/policy_2_2": -112.90089416503906, "logps_train/policy_2_w": -171.30062866210938, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.65050208568573, "rewards_train/1-l": -1.3690071105957031, "rewards_train/1-w": 2.563239574432373, "rewards_train/2-2": 1.7499487400054932, "rewards_train/2-w": -1.2300633192062378, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.932246685028076, "rewards_train/margins_1": 3.213741660118103, "rewards_train/margins_2": 2.980012059211731, "step": 194 }, { "epoch": 0.58, "logps_train/policy_1_2": -144.11695861816406, "logps_train/policy_1_l": -112.82644653320312, "logps_train/policy_1_w": -99.09501647949219, "logps_train/policy_2_2": -99.137939453125, "logps_train/policy_2_w": -157.889404296875, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.8999763131141663, "rewards_train/1-l": -1.34592604637146, "rewards_train/1-w": 2.3491902351379395, "rewards_train/2-2": 1.5740963220596313, "rewards_train/2-w": -0.986791729927063, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.6951162815093994, "rewards_train/margins_1": 3.2491665482521057, "rewards_train/margins_2": 2.5608880519866943, "step": 195 }, { "epoch": 0.58, "logps_train/policy_1_2": -132.7571258544922, "logps_train/policy_1_l": -146.8231201171875, "logps_train/policy_1_w": -122.44452667236328, "logps_train/policy_2_2": -100.2829818725586, "logps_train/policy_2_w": -171.87066650390625, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": 0.277412474155426, "rewards_train/1-l": -1.355175256729126, "rewards_train/1-w": 1.9657034873962402, "rewards_train/2-2": 1.807639718055725, "rewards_train/2-w": -0.9538628458976746, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.320878744125366, "rewards_train/margins_1": 1.6882910132408142, "rewards_train/margins_2": 2.7615025639533997, "step": 195 }, { "epoch": 0.58, "logps_train/policy_1_2": -193.7890625, "logps_train/policy_1_l": -204.94668579101562, "logps_train/policy_1_w": -117.19454193115234, "logps_train/policy_2_2": -141.50958251953125, "logps_train/policy_2_w": -160.3840789794922, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.6687495708465576, "rewards_train/1-l": -2.562931776046753, "rewards_train/1-w": 2.244413375854492, "rewards_train/2-2": 2.501776933670044, "rewards_train/2-w": -0.136844664812088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.807345151901245, "rewards_train/margins_1": 2.91316294670105, "rewards_train/margins_2": 2.638621598482132, "step": 195 }, { "epoch": 0.58, "logps_train/policy_1_2": -151.88363647460938, "logps_train/policy_1_l": -138.98965454101562, "logps_train/policy_1_w": -132.67434692382812, "logps_train/policy_2_2": -111.58746337890625, "logps_train/policy_2_w": -174.11819458007812, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -0.25867578387260437, "rewards_train/1-l": -1.5732821226119995, "rewards_train/1-w": 2.2212371826171875, "rewards_train/2-2": 2.0560965538024902, "rewards_train/2-w": -0.3266628682613373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.794519305229187, "rewards_train/margins_1": 2.479912966489792, "rewards_train/margins_2": 2.3827594220638275, "step": 195 }, { "epoch": 0.59, "learning_rate": 4.241377572657493e-06, "loss": 1.0345, "step": 196 }, { "epoch": 0.59, "logps_train/policy_1_2": -152.08474731445312, "logps_train/policy_1_l": -170.83285522460938, "logps_train/policy_1_w": -118.78926086425781, "logps_train/policy_2_2": -109.42195129394531, "logps_train/policy_2_w": -178.85211181640625, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -0.12214665859937668, "rewards_train/1-l": -0.8929046392440796, "rewards_train/1-w": 2.136235237121582, "rewards_train/2-2": 2.132023334503174, "rewards_train/2-w": -1.2152409553527832, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0291398763656616, "rewards_train/margins_1": 2.2583818957209587, "rewards_train/margins_2": 3.347264289855957, "step": 196 }, { "epoch": 0.59, "logps_train/policy_1_2": -158.8310546875, "logps_train/policy_1_l": -136.3573455810547, "logps_train/policy_1_w": -105.304931640625, "logps_train/policy_2_2": -116.2504653930664, "logps_train/policy_2_w": -145.34280395507812, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": -0.4256816506385803, "rewards_train/1-l": -1.3300461769104004, "rewards_train/1-w": 1.4040772914886475, "rewards_train/2-2": 2.2515158653259277, "rewards_train/2-w": -0.6713896989822388, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.734123468399048, "rewards_train/margins_1": 1.8297589421272278, "rewards_train/margins_2": 2.9229055643081665, "step": 196 }, { "epoch": 0.59, "logps_train/policy_1_2": -152.70199584960938, "logps_train/policy_1_l": -128.93063354492188, "logps_train/policy_1_w": -95.17646026611328, "logps_train/policy_2_2": -96.49542236328125, "logps_train/policy_2_w": -155.76304626464844, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -1.0686376094818115, "rewards_train/1-l": -1.6150600910186768, "rewards_train/1-w": 2.259087085723877, "rewards_train/2-2": 1.9348328113555908, "rewards_train/2-w": -0.8310408592224121, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.8741471767425537, "rewards_train/margins_1": 3.3277246952056885, "rewards_train/margins_2": 2.765873670578003, "step": 196 }, { "epoch": 0.59, "logps_train/policy_1_2": -146.2957000732422, "logps_train/policy_1_l": -94.67782592773438, "logps_train/policy_1_w": -96.14030456542969, "logps_train/policy_2_2": -100.89973449707031, "logps_train/policy_2_w": -140.00332641601562, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -83.5, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": -0.3924613893032074, "rewards_train/1-l": -1.1226660013198853, "rewards_train/1-w": 1.9180009365081787, "rewards_train/2-2": 1.8289721012115479, "rewards_train/2-w": -0.43626999855041504, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.040666937828064, "rewards_train/margins_1": 2.310462325811386, "rewards_train/margins_2": 2.265242099761963, "step": 196 }, { "epoch": 0.59, "logps_train/policy_1_2": -179.11172485351562, "logps_train/policy_1_l": -147.88372802734375, "logps_train/policy_1_w": -121.08135223388672, "logps_train/policy_2_2": -127.43936157226562, "logps_train/policy_2_w": -165.88771057128906, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -0.5705490112304688, "rewards_train/1-l": -1.274895429611206, "rewards_train/1-w": 2.065498113632202, "rewards_train/2-2": 2.3941500186920166, "rewards_train/2-w": -0.5346691608428955, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.340393543243408, "rewards_train/margins_1": 2.636047124862671, "rewards_train/margins_2": 2.928819179534912, "step": 197 }, { "epoch": 0.59, "logps_train/policy_1_2": -145.42581176757812, "logps_train/policy_1_l": -171.2763214111328, "logps_train/policy_1_w": -132.986083984375, "logps_train/policy_2_2": -106.55244445800781, "logps_train/policy_2_w": -187.59765625, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -0.5846207141876221, "rewards_train/1-l": -1.4399361610412598, "rewards_train/1-w": 2.475219249725342, "rewards_train/2-2": 1.6365522146224976, "rewards_train/2-w": -0.7328131794929504, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.9151554107666016, "rewards_train/margins_1": 3.059839963912964, "rewards_train/margins_2": 2.369365394115448, "step": 197 }, { "epoch": 0.59, "logps_train/policy_1_2": -202.63014221191406, "logps_train/policy_1_l": -177.912353515625, "logps_train/policy_1_w": -150.70680236816406, "logps_train/policy_2_2": -142.2731475830078, "logps_train/policy_2_w": -223.6630859375, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -0.9524667263031006, "rewards_train/1-l": -1.822486400604248, "rewards_train/1-w": 2.4109599590301514, "rewards_train/2-2": 2.5313761234283447, "rewards_train/2-w": -1.6651360988616943, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.233446359634399, "rewards_train/margins_1": 3.363426685333252, "rewards_train/margins_2": 4.196512222290039, "step": 197 }, { "epoch": 0.59, "logps_train/policy_1_2": -170.86532592773438, "logps_train/policy_1_l": -147.46322631835938, "logps_train/policy_1_w": -108.12181091308594, "logps_train/policy_2_2": -123.53042602539062, "logps_train/policy_2_w": -164.97457885742188, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -0.7966880798339844, "rewards_train/1-l": -1.270345687866211, "rewards_train/1-w": 1.9604756832122803, "rewards_train/2-2": 1.6338708400726318, "rewards_train/2-w": -1.2349586486816406, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.230821371078491, "rewards_train/margins_1": 2.7571637630462646, "rewards_train/margins_2": 2.8688294887542725, "step": 197 }, { "epoch": 0.59, "learning_rate": 4.22357184920253e-06, "loss": 1.1034, "step": 198 }, { "epoch": 0.59, "logps_train/policy_1_2": -178.1253662109375, "logps_train/policy_1_l": -134.89498901367188, "logps_train/policy_1_w": -117.22650909423828, "logps_train/policy_2_2": -131.30947875976562, "logps_train/policy_2_w": -161.297607421875, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -0.6338251829147339, "rewards_train/1-l": -1.2144006490707397, "rewards_train/1-w": 1.7047905921936035, "rewards_train/2-2": 2.035360336303711, "rewards_train/2-w": -0.8227288722991943, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9191912412643433, "rewards_train/margins_1": 2.3386157751083374, "rewards_train/margins_2": 2.8580892086029053, "step": 198 }, { "epoch": 0.59, "logps_train/policy_1_2": -145.9430389404297, "logps_train/policy_1_l": -154.51788330078125, "logps_train/policy_1_w": -119.84519958496094, "logps_train/policy_2_2": -101.58106994628906, "logps_train/policy_2_w": -169.16294860839844, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -0.7052416205406189, "rewards_train/1-l": -1.3754942417144775, "rewards_train/1-w": 2.417041778564453, "rewards_train/2-2": 1.5665030479431152, "rewards_train/2-w": -0.30691972374916077, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 3.7925360202789307, "rewards_train/margins_1": 3.122283399105072, "rewards_train/margins_2": 1.873422771692276, "step": 198 }, { "epoch": 0.59, "logps_train/policy_1_2": -200.80007934570312, "logps_train/policy_1_l": -225.34371948242188, "logps_train/policy_1_w": -151.3374481201172, "logps_train/policy_2_2": -144.1268768310547, "logps_train/policy_2_w": -229.11244201660156, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -0.6339136362075806, "rewards_train/1-l": -2.2575151920318604, "rewards_train/1-w": 3.076411485671997, "rewards_train/2-2": 2.698152542114258, "rewards_train/2-w": -1.7217925786972046, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.333926677703857, "rewards_train/margins_1": 3.7103251218795776, "rewards_train/margins_2": 4.419945120811462, "step": 198 }, { "epoch": 0.59, "logps_train/policy_1_2": -160.22244262695312, "logps_train/policy_1_l": -168.11090087890625, "logps_train/policy_1_w": -122.69770812988281, "logps_train/policy_2_2": -108.55305480957031, "logps_train/policy_2_w": -186.21612548828125, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.8886500000953674, "rewards_train/1-l": -1.03120756149292, "rewards_train/1-w": 2.0317916870117188, "rewards_train/2-2": 2.044304370880127, "rewards_train/2-w": -1.6130187511444092, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0629992485046387, "rewards_train/margins_1": 2.920441687107086, "rewards_train/margins_2": 3.657323122024536, "step": 198 }, { "epoch": 0.6, "logps_train/policy_1_2": -141.22657775878906, "logps_train/policy_1_l": -134.64944458007812, "logps_train/policy_1_w": -110.26403045654297, "logps_train/policy_2_2": -95.64651489257812, "logps_train/policy_2_w": -150.65451049804688, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -0.510157585144043, "rewards_train/1-l": -1.6469018459320068, "rewards_train/1-w": 2.1825814247131348, "rewards_train/2-2": 1.9626927375793457, "rewards_train/2-w": -0.12170083820819855, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8294832706451416, "rewards_train/margins_1": 2.6927390098571777, "rewards_train/margins_2": 2.0843935757875443, "step": 199 }, { "epoch": 0.6, "logps_train/policy_1_2": -118.0955810546875, "logps_train/policy_1_l": -190.31524658203125, "logps_train/policy_1_w": -74.25529479980469, "logps_train/policy_2_2": -85.50111389160156, "logps_train/policy_2_w": -121.21533203125, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": -0.2806517481803894, "rewards_train/1-l": -2.0975394248962402, "rewards_train/1-w": 1.8012282848358154, "rewards_train/2-2": 1.4850447177886963, "rewards_train/2-w": -0.8318849802017212, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8987677097320557, "rewards_train/margins_1": 2.081880033016205, "rewards_train/margins_2": 2.3169296979904175, "step": 199 }, { "epoch": 0.6, "logps_train/policy_1_2": -177.3627471923828, "logps_train/policy_1_l": -143.64990234375, "logps_train/policy_1_w": -100.50267028808594, "logps_train/policy_2_2": -117.60040283203125, "logps_train/policy_2_w": -162.9796142578125, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -1.5143024921417236, "rewards_train/1-l": -1.0834484100341797, "rewards_train/1-w": 1.9689958095550537, "rewards_train/2-2": 1.969451904296875, "rewards_train/2-w": -1.7371701002120972, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.0524442195892334, "rewards_train/margins_1": 3.4832983016967773, "rewards_train/margins_2": 3.706622004508972, "step": 199 }, { "epoch": 0.6, "logps_train/policy_1_2": -136.04173278808594, "logps_train/policy_1_l": -127.01445770263672, "logps_train/policy_1_w": -102.49608612060547, "logps_train/policy_2_2": -85.77774047851562, "logps_train/policy_2_w": -151.71636962890625, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -101.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -1.4588602781295776, "rewards_train/1-l": -0.9763977527618408, "rewards_train/1-w": 1.4037117958068848, "rewards_train/2-2": 1.5814054012298584, "rewards_train/2-w": -1.768902063369751, "rewards_train/accuracies": 0.75, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.3801095485687256, "rewards_train/margins_1": 2.8625720739364624, "rewards_train/margins_2": 3.3503074645996094, "step": 199 }, { "epoch": 0.6, "learning_rate": 4.205597908173555e-06, "loss": 1.106, "step": 200 }, { "epoch": 0.6, "logps_train/policy_1_2": -194.52603149414062, "logps_train/policy_1_l": -136.8233642578125, "logps_train/policy_1_w": -142.0133056640625, "logps_train/policy_2_2": -138.53857421875, "logps_train/policy_2_w": -201.1863555908203, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -0.7322893142700195, "rewards_train/1-l": -1.237608790397644, "rewards_train/1-w": 2.7036008834838867, "rewards_train/2-2": 2.40786075592041, "rewards_train/2-w": -0.8870927691459656, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9412096738815308, "rewards_train/margins_1": 3.4358901977539062, "rewards_train/margins_2": 3.2949535250663757, "step": 200 }, { "epoch": 0.6, "logps_train/policy_1_2": -203.79844665527344, "logps_train/policy_1_l": -224.89454650878906, "logps_train/policy_1_w": -135.48789978027344, "logps_train/policy_2_2": -141.17074584960938, "logps_train/policy_2_w": -202.02488708496094, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.8946887850761414, "rewards_train/1-l": -2.0855486392974854, "rewards_train/1-w": 2.362734317779541, "rewards_train/2-2": 2.776674747467041, "rewards_train/2-w": -1.217429518699646, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.448282957077026, "rewards_train/margins_1": 3.2574231028556824, "rewards_train/margins_2": 3.994104266166687, "step": 200 }, { "epoch": 0.6, "logps_train/policy_1_2": -215.55429077148438, "logps_train/policy_1_l": -224.7869873046875, "logps_train/policy_1_w": -134.64019775390625, "logps_train/policy_2_2": -138.67208862304688, "logps_train/policy_2_w": -208.58547973632812, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": -1.4296491146087646, "rewards_train/1-l": -2.841320753097534, "rewards_train/1-w": 2.825824737548828, "rewards_train/2-2": 2.95163893699646, "rewards_train/2-w": -0.9616737365722656, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.667145490646362, "rewards_train/margins_1": 4.255473852157593, "rewards_train/margins_2": 3.9133126735687256, "step": 200 }, { "epoch": 0.6, "logps_train/policy_1_2": -139.2686767578125, "logps_train/policy_1_l": -144.53341674804688, "logps_train/policy_1_w": -81.73843383789062, "logps_train/policy_2_2": -100.03886413574219, "logps_train/policy_2_w": -113.53329467773438, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": -0.3088997006416321, "rewards_train/1-l": -1.3105933666229248, "rewards_train/1-w": 1.4134126901626587, "rewards_train/2-2": 2.051973342895508, "rewards_train/2-w": -0.34004801511764526, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.7240060567855835, "rewards_train/margins_1": 1.7223123908042908, "rewards_train/margins_2": 2.392021358013153, "step": 200 }, { "epoch": 0.6, "logps_train/policy_1_2": -167.36911010742188, "logps_train/policy_1_l": -215.0551300048828, "logps_train/policy_1_w": -154.9891357421875, "logps_train/policy_2_2": -111.507080078125, "logps_train/policy_2_w": -220.95843505859375, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": -1.038278579711914, "rewards_train/1-l": -2.164302349090576, "rewards_train/1-w": 2.4917118549346924, "rewards_train/2-2": 1.9375733137130737, "rewards_train/2-w": -1.0477975606918335, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.6560142040252686, "rewards_train/margins_1": 3.5299904346466064, "rewards_train/margins_2": 2.9853708744049072, "step": 201 }, { "epoch": 0.6, "logps_train/policy_1_2": -182.45053100585938, "logps_train/policy_1_l": -189.77301025390625, "logps_train/policy_1_w": -134.2057647705078, "logps_train/policy_2_2": -129.191650390625, "logps_train/policy_2_w": -198.56744384765625, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -0.26575547456741333, "rewards_train/1-l": -1.4268617630004883, "rewards_train/1-w": 2.6983695030212402, "rewards_train/2-2": 2.7657952308654785, "rewards_train/2-w": -1.2102597951889038, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.1252312660217285, "rewards_train/margins_1": 2.9641249775886536, "rewards_train/margins_2": 3.9760550260543823, "step": 201 }, { "epoch": 0.6, "logps_train/policy_1_2": -155.9585723876953, "logps_train/policy_1_l": -120.58817291259766, "logps_train/policy_1_w": -94.53750610351562, "logps_train/policy_2_2": -106.27294158935547, "logps_train/policy_2_w": -147.80459594726562, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -0.9899982213973999, "rewards_train/1-l": -1.3783485889434814, "rewards_train/1-w": 2.25713849067688, "rewards_train/2-2": 2.123487710952759, "rewards_train/2-w": -0.8072178959846497, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.6354870796203613, "rewards_train/margins_1": 3.24713671207428, "rewards_train/margins_2": 2.9307056069374084, "step": 201 }, { "epoch": 0.6, "logps_train/policy_1_2": -172.3436279296875, "logps_train/policy_1_l": -138.5262451171875, "logps_train/policy_1_w": -165.9449462890625, "logps_train/policy_2_2": -123.77462768554688, "logps_train/policy_2_w": -232.66766357421875, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -0.8612174987792969, "rewards_train/1-l": -1.2576043605804443, "rewards_train/1-w": 2.6896839141845703, "rewards_train/2-2": 2.0374794006347656, "rewards_train/2-w": -1.3380558490753174, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9472882747650146, "rewards_train/margins_1": 3.550901412963867, "rewards_train/margins_2": 3.375535249710083, "step": 201 }, { "epoch": 0.6, "learning_rate": 4.187457503795526e-06, "loss": 0.9848, "step": 202 }, { "epoch": 0.6, "logps_train/policy_1_2": -152.94384765625, "logps_train/policy_1_l": -124.98796081542969, "logps_train/policy_1_w": -83.82866668701172, "logps_train/policy_2_2": -115.68536376953125, "logps_train/policy_2_w": -117.20834350585938, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": -0.00991174578666687, "rewards_train/1-l": -1.6347336769104004, "rewards_train/1-w": 1.609320878982544, "rewards_train/2-2": 2.1558780670166016, "rewards_train/2-w": -0.22513151168823242, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.2440545558929443, "rewards_train/margins_1": 1.6192326247692108, "rewards_train/margins_2": 2.381009578704834, "step": 202 }, { "epoch": 0.6, "logps_train/policy_1_2": -133.3553466796875, "logps_train/policy_1_l": -136.82208251953125, "logps_train/policy_1_w": -109.11097717285156, "logps_train/policy_2_2": -93.9619369506836, "logps_train/policy_2_w": -146.4114990234375, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -0.39744865894317627, "rewards_train/1-l": -1.2722477912902832, "rewards_train/1-w": 1.647740364074707, "rewards_train/2-2": 1.6027567386627197, "rewards_train/2-w": -0.5331412553787231, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9199881553649902, "rewards_train/margins_1": 2.0451890230178833, "rewards_train/margins_2": 2.135897994041443, "step": 202 }, { "epoch": 0.6, "logps_train/policy_1_2": -207.78045654296875, "logps_train/policy_1_l": -188.15774536132812, "logps_train/policy_1_w": -128.11549377441406, "logps_train/policy_2_2": -147.41342163085938, "logps_train/policy_2_w": -193.33285522460938, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.1585142612457275, "rewards_train/1-l": -1.5038609504699707, "rewards_train/1-w": 2.213744640350342, "rewards_train/2-2": 2.406315326690674, "rewards_train/2-w": -1.3731305599212646, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.7176055908203125, "rewards_train/margins_1": 3.3722589015960693, "rewards_train/margins_2": 3.7794458866119385, "step": 202 }, { "epoch": 0.6, "logps_train/policy_1_2": -155.00949096679688, "logps_train/policy_1_l": -172.5868377685547, "logps_train/policy_1_w": -168.7401885986328, "logps_train/policy_2_2": -111.58721923828125, "logps_train/policy_2_w": -238.35311889648438, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": -0.635324239730835, "rewards_train/1-l": -1.5751019716262817, "rewards_train/1-w": 2.817777633666992, "rewards_train/2-2": 1.5994817018508911, "rewards_train/2-w": -1.4685157537460327, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.392879605293274, "rewards_train/margins_1": 3.453101873397827, "rewards_train/margins_2": 3.067997455596924, "step": 202 }, { "epoch": 0.61, "logps_train/policy_1_2": -207.04437255859375, "logps_train/policy_1_l": -202.986083984375, "logps_train/policy_1_w": -156.72579956054688, "logps_train/policy_2_2": -139.93911743164062, "logps_train/policy_2_w": -233.7130126953125, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -1.3474050760269165, "rewards_train/1-l": -1.434107780456543, "rewards_train/1-w": 3.166872978210449, "rewards_train/2-2": 2.5431973934173584, "rewards_train/2-w": -1.2056753635406494, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.600980758666992, "rewards_train/margins_1": 4.514278054237366, "rewards_train/margins_2": 3.748872756958008, "step": 203 }, { "epoch": 0.61, "logps_train/policy_1_2": -154.69375610351562, "logps_train/policy_1_l": -178.691650390625, "logps_train/policy_1_w": -116.17535400390625, "logps_train/policy_2_2": -100.40380096435547, "logps_train/policy_2_w": -181.739501953125, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.0525788068771362, "rewards_train/1-l": -1.6669199466705322, "rewards_train/1-w": 2.4258244037628174, "rewards_train/2-2": 1.8822762966156006, "rewards_train/2-w": -1.3962154388427734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.09274435043335, "rewards_train/margins_1": 3.4784032106399536, "rewards_train/margins_2": 3.278491735458374, "step": 203 }, { "epoch": 0.61, "logps_train/policy_1_2": -210.74366760253906, "logps_train/policy_1_l": -167.02525329589844, "logps_train/policy_1_w": -146.21376037597656, "logps_train/policy_2_2": -163.26846313476562, "logps_train/policy_2_w": -196.53489685058594, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.35014769434928894, "rewards_train/1-l": -1.82776939868927, "rewards_train/1-w": 2.4354355335235596, "rewards_train/2-2": 2.356748104095459, "rewards_train/2-w": -0.4185052216053009, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.26320493221283, "rewards_train/margins_1": 2.7855832278728485, "rewards_train/margins_2": 2.77525332570076, "step": 203 }, { "epoch": 0.61, "logps_train/policy_1_2": -148.26988220214844, "logps_train/policy_1_l": -167.45242309570312, "logps_train/policy_1_w": -119.11177062988281, "logps_train/policy_2_2": -107.75581359863281, "logps_train/policy_2_w": -167.79595947265625, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.7856800556182861, "rewards_train/1-l": -2.617994785308838, "rewards_train/1-w": 2.0356979370117188, "rewards_train/2-2": 1.5212942361831665, "rewards_train/2-w": -0.7276424169540405, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.653692722320557, "rewards_train/margins_1": 2.821377992630005, "rewards_train/margins_2": 2.248936653137207, "step": 203 }, { "epoch": 0.61, "learning_rate": 4.169152406539933e-06, "loss": 1.027, "step": 204 }, { "epoch": 0.61, "logps_train/policy_1_2": -160.98635864257812, "logps_train/policy_1_l": -141.77017211914062, "logps_train/policy_1_w": -112.200927734375, "logps_train/policy_2_2": -115.44514465332031, "logps_train/policy_2_w": -163.03057861328125, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": -0.5314480662345886, "rewards_train/1-l": -1.9707672595977783, "rewards_train/1-w": 1.8738526105880737, "rewards_train/2-2": 2.2990403175354004, "rewards_train/2-w": -0.7661452889442444, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.844619870185852, "rewards_train/margins_1": 2.4053006768226624, "rewards_train/margins_2": 3.0651856064796448, "step": 204 }, { "epoch": 0.61, "logps_train/policy_1_2": -152.38412475585938, "logps_train/policy_1_l": -103.43618774414062, "logps_train/policy_1_w": -115.71076202392578, "logps_train/policy_2_2": -114.50646209716797, "logps_train/policy_2_w": -162.62197875976562, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": 0.31788673996925354, "rewards_train/1-l": -1.1557285785675049, "rewards_train/1-w": 1.9877128601074219, "rewards_train/2-2": 2.2441294193267822, "rewards_train/2-w": -0.8817293047904968, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.1434414386749268, "rewards_train/margins_1": 1.6698261201381683, "rewards_train/margins_2": 3.125858724117279, "step": 204 }, { "epoch": 0.61, "logps_train/policy_1_2": -164.1367950439453, "logps_train/policy_1_l": -142.81597900390625, "logps_train/policy_1_w": -139.4429473876953, "logps_train/policy_2_2": -106.44454956054688, "logps_train/policy_2_w": -199.55569458007812, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -1.1281323432922363, "rewards_train/1-l": -1.2119935750961304, "rewards_train/1-w": 1.9111984968185425, "rewards_train/2-2": 1.9446076154708862, "rewards_train/2-w": -1.4155305624008179, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.123192071914673, "rewards_train/margins_1": 3.039330840110779, "rewards_train/margins_2": 3.360138177871704, "step": 204 }, { "epoch": 0.61, "logps_train/policy_1_2": -217.24818420410156, "logps_train/policy_1_l": -201.08299255371094, "logps_train/policy_1_w": -134.04209899902344, "logps_train/policy_2_2": -150.4605255126953, "logps_train/policy_2_w": -212.2011260986328, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.6657360792160034, "rewards_train/1-l": -2.850877523422241, "rewards_train/1-w": 2.224012851715088, "rewards_train/2-2": 2.2483325004577637, "rewards_train/2-w": -2.4729933738708496, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.074890375137329, "rewards_train/margins_1": 3.8897489309310913, "rewards_train/margins_2": 4.721325874328613, "step": 204 }, { "epoch": 0.61, "logps_train/policy_1_2": -187.36753845214844, "logps_train/policy_1_l": -173.08010864257812, "logps_train/policy_1_w": -96.03358459472656, "logps_train/policy_2_2": -128.14743041992188, "logps_train/policy_2_w": -150.8116912841797, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.245347499847412, "rewards_train/1-l": -1.806678056716919, "rewards_train/1-w": 1.522129774093628, "rewards_train/2-2": 2.0346715450286865, "rewards_train/2-w": -1.0777506828308105, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.328807830810547, "rewards_train/margins_1": 2.76747727394104, "rewards_train/margins_2": 3.112422227859497, "step": 205 }, { "epoch": 0.61, "logps_train/policy_1_2": -184.55718994140625, "logps_train/policy_1_l": -137.83544921875, "logps_train/policy_1_w": -112.98298645019531, "logps_train/policy_2_2": -129.48294067382812, "logps_train/policy_2_w": -165.07958984375, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -1.0135314464569092, "rewards_train/1-l": -1.378028154373169, "rewards_train/1-w": 1.8441330194473267, "rewards_train/2-2": 2.440767288208008, "rewards_train/2-w": -1.211082935333252, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2221611738204956, "rewards_train/margins_1": 2.857664465904236, "rewards_train/margins_2": 3.6518502235412598, "step": 205 }, { "epoch": 0.61, "logps_train/policy_1_2": -212.0953369140625, "logps_train/policy_1_l": -152.88473510742188, "logps_train/policy_1_w": -152.31947326660156, "logps_train/policy_2_2": -153.60340881347656, "logps_train/policy_2_w": -209.228759765625, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -1.3275035619735718, "rewards_train/1-l": -1.6059784889221191, "rewards_train/1-w": 2.3383657932281494, "rewards_train/2-2": 2.323643684387207, "rewards_train/2-w": -1.3502211570739746, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9443442821502686, "rewards_train/margins_1": 3.665869355201721, "rewards_train/margins_2": 3.6738648414611816, "step": 205 }, { "epoch": 0.61, "logps_train/policy_1_2": -163.04901123046875, "logps_train/policy_1_l": -164.22671508789062, "logps_train/policy_1_w": -105.94401550292969, "logps_train/policy_2_2": -119.21513366699219, "logps_train/policy_2_w": -148.90016174316406, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -0.6783396005630493, "rewards_train/1-l": -1.4089988470077515, "rewards_train/1-w": 1.887629747390747, "rewards_train/2-2": 1.9792672395706177, "rewards_train/2-w": -0.4611097574234009, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2966285943984985, "rewards_train/margins_1": 2.5659693479537964, "rewards_train/margins_2": 2.4403769969940186, "step": 205 }, { "epoch": 0.62, "learning_rate": 4.150684402951994e-06, "loss": 1.0824, "step": 206 }, { "epoch": 0.62, "logps_train/policy_1_2": -149.62606811523438, "logps_train/policy_1_l": -105.00066375732422, "logps_train/policy_1_w": -89.706787109375, "logps_train/policy_2_2": -103.36900329589844, "logps_train/policy_2_w": -139.06690979003906, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": -0.7766198515892029, "rewards_train/1-l": -1.1361013650894165, "rewards_train/1-w": 1.8347904682159424, "rewards_train/2-2": 1.7098281383514404, "rewards_train/2-w": -1.0230964422225952, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.970891833305359, "rewards_train/margins_1": 2.6114103198051453, "rewards_train/margins_2": 2.7329245805740356, "step": 206 }, { "epoch": 0.62, "logps_train/policy_1_2": -139.66172790527344, "logps_train/policy_1_l": -162.65142822265625, "logps_train/policy_1_w": -122.69025421142578, "logps_train/policy_2_2": -94.70179748535156, "logps_train/policy_2_w": -168.07647705078125, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.6735951900482178, "rewards_train/1-l": -1.9377024173736572, "rewards_train/1-w": 1.8920094966888428, "rewards_train/2-2": 2.1313822269439697, "rewards_train/2-w": -0.9630192518234253, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8297119140625, "rewards_train/margins_1": 2.5656046867370605, "rewards_train/margins_2": 3.094401478767395, "step": 206 }, { "epoch": 0.62, "logps_train/policy_1_2": -146.41275024414062, "logps_train/policy_1_l": -162.27682495117188, "logps_train/policy_1_w": -155.15655517578125, "logps_train/policy_2_2": -96.66329956054688, "logps_train/policy_2_w": -241.50547790527344, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -1.076040267944336, "rewards_train/1-l": -1.6802698373794556, "rewards_train/1-w": 2.1071460247039795, "rewards_train/2-2": 1.8959746360778809, "rewards_train/2-w": -2.3703718185424805, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.787415862083435, "rewards_train/margins_1": 3.1831862926483154, "rewards_train/margins_2": 4.266346454620361, "step": 206 }, { "epoch": 0.62, "logps_train/policy_1_2": -160.16986083984375, "logps_train/policy_1_l": -128.78675842285156, "logps_train/policy_1_w": -107.21011352539062, "logps_train/policy_2_2": -116.86669921875, "logps_train/policy_2_w": -156.89227294921875, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -124.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -0.30448654294013977, "rewards_train/1-l": -0.7596200108528137, "rewards_train/1-w": 1.7257295846939087, "rewards_train/2-2": 2.2723145484924316, "rewards_train/2-w": -0.7599307894706726, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.4853495955467224, "rewards_train/margins_1": 2.0302161276340485, "rewards_train/margins_2": 3.0322453379631042, "step": 206 }, { "epoch": 0.62, "logps_train/policy_1_2": -151.87081909179688, "logps_train/policy_1_l": -145.26914978027344, "logps_train/policy_1_w": -134.15631103515625, "logps_train/policy_2_2": -94.25056457519531, "logps_train/policy_2_w": -194.7057647705078, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.2956749200820923, "rewards_train/1-l": -1.0160508155822754, "rewards_train/1-w": 1.334368348121643, "rewards_train/2-2": 2.1319751739501953, "rewards_train/2-w": -2.2744829654693604, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.3504191637039185, "rewards_train/margins_1": 2.6300432682037354, "rewards_train/margins_2": 4.406458139419556, "step": 207 }, { "epoch": 0.62, "logps_train/policy_1_2": -133.12728881835938, "logps_train/policy_1_l": -117.88733673095703, "logps_train/policy_1_w": -119.76898193359375, "logps_train/policy_2_2": -82.16417694091797, "logps_train/policy_2_w": -185.50267028808594, "logps_train/ref_1_2": -120.5, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": -1.2670252323150635, "rewards_train/1-l": -1.439661979675293, "rewards_train/1-w": 2.400054454803467, "rewards_train/2-2": 1.6848522424697876, "rewards_train/2-w": -1.849877119064331, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8397164344787598, "rewards_train/margins_1": 3.6670796871185303, "rewards_train/margins_2": 3.5347293615341187, "step": 207 }, { "epoch": 0.62, "logps_train/policy_1_2": -183.80714416503906, "logps_train/policy_1_l": -142.12777709960938, "logps_train/policy_1_w": -127.07736206054688, "logps_train/policy_2_2": -127.63005828857422, "logps_train/policy_2_w": -179.64231872558594, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": -0.7920428514480591, "rewards_train/1-l": -1.7583835124969482, "rewards_train/1-w": 2.0879673957824707, "rewards_train/2-2": 2.618244171142578, "rewards_train/2-w": -1.2337626218795776, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.846350908279419, "rewards_train/margins_1": 2.88001024723053, "rewards_train/margins_2": 3.8520067930221558, "step": 207 }, { "epoch": 0.62, "logps_train/policy_1_2": -194.77554321289062, "logps_train/policy_1_l": -160.924560546875, "logps_train/policy_1_w": -154.6798553466797, "logps_train/policy_2_2": -132.00851440429688, "logps_train/policy_2_w": -234.7266387939453, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -1.9990389347076416, "rewards_train/1-l": -1.375610113143921, "rewards_train/1-w": 2.3757641315460205, "rewards_train/2-2": 2.1345009803771973, "rewards_train/2-w": -2.261725902557373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7513742446899414, "rewards_train/margins_1": 4.374803066253662, "rewards_train/margins_2": 4.39622688293457, "step": 207 }, { "epoch": 0.62, "learning_rate": 4.132055295476304e-06, "loss": 1.102, "step": 208 }, { "epoch": 0.62, "logps_train/policy_1_2": -181.8159637451172, "logps_train/policy_1_l": -176.09169006347656, "logps_train/policy_1_w": -118.47187042236328, "logps_train/policy_2_2": -119.53937530517578, "logps_train/policy_2_w": -181.6510009765625, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.673978567123413, "rewards_train/1-l": -1.346816062927246, "rewards_train/1-w": 2.0782034397125244, "rewards_train/2-2": 2.2080743312835693, "rewards_train/2-w": -1.628771185874939, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.4250195026397705, "rewards_train/margins_1": 3.7521820068359375, "rewards_train/margins_2": 3.8368455171585083, "step": 208 }, { "epoch": 0.62, "logps_train/policy_1_2": -194.88589477539062, "logps_train/policy_1_l": -229.08065795898438, "logps_train/policy_1_w": -170.01339721679688, "logps_train/policy_2_2": -151.99578857421875, "logps_train/policy_2_w": -223.78457641601562, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -0.5950356721878052, "rewards_train/1-l": -1.7830662727355957, "rewards_train/1-w": 2.598268747329712, "rewards_train/2-2": 1.8116520643234253, "rewards_train/2-w": -0.5056068897247314, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.381335020065308, "rewards_train/margins_1": 3.193304419517517, "rewards_train/margins_2": 2.3172589540481567, "step": 208 }, { "epoch": 0.62, "logps_train/policy_1_2": -159.449951171875, "logps_train/policy_1_l": -127.76029205322266, "logps_train/policy_1_w": -63.473262786865234, "logps_train/policy_2_2": -100.87728881835938, "logps_train/policy_2_w": -97.82965087890625, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -81.5, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": -1.8059327602386475, "rewards_train/1-l": -1.8006210327148438, "rewards_train/1-w": 1.8245668411254883, "rewards_train/2-2": 2.0605618953704834, "rewards_train/2-w": -0.2651920020580292, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.625187873840332, "rewards_train/margins_1": 3.6304996013641357, "rewards_train/margins_2": 2.3257538974285126, "step": 208 }, { "epoch": 0.62, "logps_train/policy_1_2": -202.2833709716797, "logps_train/policy_1_l": -169.9805145263672, "logps_train/policy_1_w": -124.59638977050781, "logps_train/policy_2_2": -132.35650634765625, "logps_train/policy_2_w": -190.25094604492188, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -2.3002121448516846, "rewards_train/1-l": -2.1324262619018555, "rewards_train/1-w": 2.215947151184082, "rewards_train/2-2": 2.262786626815796, "rewards_train/2-w": -1.9192348718643188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.3483734130859375, "rewards_train/margins_1": 4.516159296035767, "rewards_train/margins_2": 4.182021498680115, "step": 208 }, { "epoch": 0.63, "logps_train/policy_1_2": -162.4464111328125, "logps_train/policy_1_l": -194.90896606445312, "logps_train/policy_1_w": -114.36937713623047, "logps_train/policy_2_2": -109.06604766845703, "logps_train/policy_2_w": -177.44589233398438, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -0.6977663040161133, "rewards_train/1-l": -1.9152374267578125, "rewards_train/1-w": 2.24899959564209, "rewards_train/2-2": 1.9051145315170288, "rewards_train/2-w": -1.1850186586380005, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.164237022399902, "rewards_train/margins_1": 2.946765899658203, "rewards_train/margins_2": 3.0901331901550293, "step": 209 }, { "epoch": 0.63, "logps_train/policy_1_2": -210.59039306640625, "logps_train/policy_1_l": -179.34619140625, "logps_train/policy_1_w": -169.63925170898438, "logps_train/policy_2_2": -145.69708251953125, "logps_train/policy_2_w": -241.8563995361328, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": -1.906818151473999, "rewards_train/1-l": -1.5785629749298096, "rewards_train/1-w": 2.9716219902038574, "rewards_train/2-2": 2.210808515548706, "rewards_train/2-w": -1.3500938415527344, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.550184965133667, "rewards_train/margins_1": 4.8784401416778564, "rewards_train/margins_2": 3.5609023571014404, "step": 209 }, { "epoch": 0.63, "logps_train/policy_1_2": -167.42666625976562, "logps_train/policy_1_l": -156.21051025390625, "logps_train/policy_1_w": -137.87826538085938, "logps_train/policy_2_2": -119.21917724609375, "logps_train/policy_2_w": -201.677734375, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.2657127380371094, "rewards_train/1-l": -1.3755431175231934, "rewards_train/1-w": 2.473891258239746, "rewards_train/2-2": 1.3950746059417725, "rewards_train/2-w": -1.0677725076675415, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8494343757629395, "rewards_train/margins_1": 3.7396039962768555, "rewards_train/margins_2": 2.462847113609314, "step": 209 }, { "epoch": 0.63, "logps_train/policy_1_2": -191.26124572753906, "logps_train/policy_1_l": -153.644287109375, "logps_train/policy_1_w": -121.40162658691406, "logps_train/policy_2_2": -136.39675903320312, "logps_train/policy_2_w": -168.4656524658203, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -0.31792134046554565, "rewards_train/1-l": -1.790600061416626, "rewards_train/1-w": 1.9793686866760254, "rewards_train/2-2": 3.051339626312256, "rewards_train/2-w": -1.0332849025726318, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.7699687480926514, "rewards_train/margins_1": 2.297290027141571, "rewards_train/margins_2": 4.084624528884888, "step": 209 }, { "epoch": 0.63, "learning_rate": 4.113266902280914e-06, "loss": 1.0382, "step": 210 }, { "epoch": 0.63, "logps_train/policy_1_2": -177.19705200195312, "logps_train/policy_1_l": -194.6887969970703, "logps_train/policy_1_w": -148.39340209960938, "logps_train/policy_2_2": -122.84671020507812, "logps_train/policy_2_w": -216.26681518554688, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -0.7230254411697388, "rewards_train/1-l": -1.4137043952941895, "rewards_train/1-w": 2.852849006652832, "rewards_train/2-2": 2.284372091293335, "rewards_train/2-w": -1.2309777736663818, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.2665534019470215, "rewards_train/margins_1": 3.575874447822571, "rewards_train/margins_2": 3.515349864959717, "step": 210 }, { "epoch": 0.63, "logps_train/policy_1_2": -191.29762268066406, "logps_train/policy_1_l": -187.50830078125, "logps_train/policy_1_w": -155.38604736328125, "logps_train/policy_2_2": -137.93809509277344, "logps_train/policy_2_w": -218.94393920898438, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -0.9680432677268982, "rewards_train/1-l": -1.5242685079574585, "rewards_train/1-w": 2.8192074298858643, "rewards_train/2-2": 1.9163469076156616, "rewards_train/2-w": -1.0701760053634644, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.343475937843323, "rewards_train/margins_1": 3.7872506976127625, "rewards_train/margins_2": 2.986522912979126, "step": 210 }, { "epoch": 0.63, "logps_train/policy_1_2": -192.25357055664062, "logps_train/policy_1_l": -177.23782348632812, "logps_train/policy_1_w": -139.8364715576172, "logps_train/policy_2_2": -124.87895965576172, "logps_train/policy_2_w": -224.1104278564453, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -1.9355137348175049, "rewards_train/1-l": -1.7687032222747803, "rewards_train/1-w": 2.627290725708008, "rewards_train/2-2": 2.3312439918518066, "rewards_train/2-w": -2.0430731773376465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.395993947982788, "rewards_train/margins_1": 4.562804460525513, "rewards_train/margins_2": 4.374317169189453, "step": 210 }, { "epoch": 0.63, "logps_train/policy_1_2": -155.7578125, "logps_train/policy_1_l": -174.44918823242188, "logps_train/policy_1_w": -127.33149719238281, "logps_train/policy_2_2": -108.13182830810547, "logps_train/policy_2_w": -169.55935668945312, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -0.9132825136184692, "rewards_train/1-l": -1.9337866306304932, "rewards_train/1-w": 1.5262250900268555, "rewards_train/2-2": 1.535254716873169, "rewards_train/2-w": -0.6067180037498474, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4600117206573486, "rewards_train/margins_1": 2.4395076036453247, "rewards_train/margins_2": 2.1419727206230164, "step": 210 }, { "epoch": 0.63, "logps_train/policy_1_2": -158.50082397460938, "logps_train/policy_1_l": -136.7720947265625, "logps_train/policy_1_w": -108.07247924804688, "logps_train/policy_2_2": -111.60287475585938, "logps_train/policy_2_w": -162.02906799316406, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -0.9190273284912109, "rewards_train/1-l": -1.4395134449005127, "rewards_train/1-w": 2.05173659324646, "rewards_train/2-2": 1.8865878582000732, "rewards_train/2-w": -1.2118912935256958, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.4912500381469727, "rewards_train/margins_1": 2.970763921737671, "rewards_train/margins_2": 3.098479151725769, "step": 211 }, { "epoch": 0.63, "logps_train/policy_1_2": -143.92849731445312, "logps_train/policy_1_l": -133.49032592773438, "logps_train/policy_1_w": -127.88160705566406, "logps_train/policy_2_2": -105.09605407714844, "logps_train/policy_2_w": -182.34909057617188, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.48464658856391907, "rewards_train/1-l": -0.798154354095459, "rewards_train/1-w": 1.9294168949127197, "rewards_train/2-2": 1.6771135330200195, "rewards_train/2-w": -1.2517051696777344, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.7275712490081787, "rewards_train/margins_1": 2.414063483476639, "rewards_train/margins_2": 2.928818702697754, "step": 211 }, { "epoch": 0.63, "logps_train/policy_1_2": -150.15914916992188, "logps_train/policy_1_l": -113.39901733398438, "logps_train/policy_1_w": -107.5659408569336, "logps_train/policy_2_2": -107.66162109375, "logps_train/policy_2_w": -149.5601806640625, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -0.7174776792526245, "rewards_train/1-l": -1.2320888042449951, "rewards_train/1-w": 1.9392063617706299, "rewards_train/2-2": 1.7530765533447266, "rewards_train/2-w": -0.6042614579200745, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.171295166015625, "rewards_train/margins_1": 2.6566840410232544, "rewards_train/margins_2": 2.357338011264801, "step": 211 }, { "epoch": 0.63, "logps_train/policy_1_2": -206.64466857910156, "logps_train/policy_1_l": -176.85855102539062, "logps_train/policy_1_w": -135.9224853515625, "logps_train/policy_2_2": -140.30348205566406, "logps_train/policy_2_w": -202.75933837890625, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.0339984893798828, "rewards_train/1-l": -1.3833649158477783, "rewards_train/1-w": 2.628063678741455, "rewards_train/2-2": 2.4481678009033203, "rewards_train/2-w": -1.592926025390625, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.011428594589233, "rewards_train/margins_1": 3.662062168121338, "rewards_train/margins_2": 4.041093826293945, "step": 211 }, { "epoch": 0.63, "learning_rate": 4.094321057079874e-06, "loss": 1.0513, "step": 212 }, { "epoch": 0.63, "logps_train/policy_1_2": -132.0124053955078, "logps_train/policy_1_l": -141.17929077148438, "logps_train/policy_1_w": -99.76483154296875, "logps_train/policy_2_2": -84.09818267822266, "logps_train/policy_2_w": -164.13906860351562, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -102.5, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -1.0907909870147705, "rewards_train/1-l": -1.2197109460830688, "rewards_train/1-w": 1.9918758869171143, "rewards_train/2-2": 1.8478721380233765, "rewards_train/2-w": -2.112002372741699, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.211586833000183, "rewards_train/margins_1": 3.0826668739318848, "rewards_train/margins_2": 3.9598745107650757, "step": 212 }, { "epoch": 0.63, "logps_train/policy_1_2": -203.57252502441406, "logps_train/policy_1_l": -142.37596130371094, "logps_train/policy_1_w": -114.01240539550781, "logps_train/policy_2_2": -138.04718017578125, "logps_train/policy_2_w": -169.26461791992188, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -1.6459238529205322, "rewards_train/1-l": -2.1633782386779785, "rewards_train/1-w": 2.3296194076538086, "rewards_train/2-2": 2.663300037384033, "rewards_train/2-w": -1.120992660522461, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.492997646331787, "rewards_train/margins_1": 3.975543260574341, "rewards_train/margins_2": 3.784292697906494, "step": 212 }, { "epoch": 0.63, "logps_train/policy_1_2": -170.490966796875, "logps_train/policy_1_l": -125.92102813720703, "logps_train/policy_1_w": -109.03562927246094, "logps_train/policy_2_2": -109.94866180419922, "logps_train/policy_2_w": -182.113525390625, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -2.012572765350342, "rewards_train/1-l": -1.4749152660369873, "rewards_train/1-w": 2.328711986541748, "rewards_train/2-2": 1.6963447332382202, "rewards_train/2-w": -2.1867430210113525, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.8036272525787354, "rewards_train/margins_1": 4.34128475189209, "rewards_train/margins_2": 3.8830877542495728, "step": 212 }, { "epoch": 0.63, "logps_train/policy_1_2": -171.920166015625, "logps_train/policy_1_l": -113.20892333984375, "logps_train/policy_1_w": -100.23517608642578, "logps_train/policy_2_2": -113.71205139160156, "logps_train/policy_2_w": -147.98117065429688, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -100.5, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -1.4538336992263794, "rewards_train/1-l": -1.2862484455108643, "rewards_train/1-w": 1.619426965713501, "rewards_train/2-2": 2.2616074085235596, "rewards_train/2-w": -1.1343485116958618, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9056754112243652, "rewards_train/margins_1": 3.0732606649398804, "rewards_train/margins_2": 3.3959559202194214, "step": 212 }, { "epoch": 0.64, "logps_train/policy_1_2": -227.31219482421875, "logps_train/policy_1_l": -169.58349609375, "logps_train/policy_1_w": -111.38936614990234, "logps_train/policy_2_2": -156.0738525390625, "logps_train/policy_2_w": -175.13893127441406, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.6093443632125854, "rewards_train/1-l": -1.9331550598144531, "rewards_train/1-w": 2.3828413486480713, "rewards_train/2-2": 2.7496471405029297, "rewards_train/2-w": -1.3231700658798218, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.315996408462524, "rewards_train/margins_1": 3.9921857118606567, "rewards_train/margins_2": 4.0728172063827515, "step": 213 }, { "epoch": 0.64, "logps_train/policy_1_2": -176.2301025390625, "logps_train/policy_1_l": -186.60658264160156, "logps_train/policy_1_w": -158.39208984375, "logps_train/policy_2_2": -122.76957702636719, "logps_train/policy_2_w": -242.20925903320312, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -227.0, "rewards_train/1-2": -0.9667596220970154, "rewards_train/1-l": -1.9743294715881348, "rewards_train/1-w": 3.2708487510681152, "rewards_train/2-2": 2.079292058944702, "rewards_train/2-w": -1.5002225637435913, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.24517822265625, "rewards_train/margins_1": 4.237608373165131, "rewards_train/margins_2": 3.5795146226882935, "step": 213 }, { "epoch": 0.64, "logps_train/policy_1_2": -200.2860870361328, "logps_train/policy_1_l": -161.041748046875, "logps_train/policy_1_w": -140.71823120117188, "logps_train/policy_2_2": -147.55801391601562, "logps_train/policy_2_w": -195.25491333007812, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -0.8766553401947021, "rewards_train/1-l": -1.1385492086410522, "rewards_train/1-w": 2.6133317947387695, "rewards_train/2-2": 2.4961516857147217, "rewards_train/2-w": -0.457912802696228, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7518810033798218, "rewards_train/margins_1": 3.4899871349334717, "rewards_train/margins_2": 2.9540644884109497, "step": 213 }, { "epoch": 0.64, "logps_train/policy_1_2": -180.87191772460938, "logps_train/policy_1_l": -133.68707275390625, "logps_train/policy_1_w": -97.61498260498047, "logps_train/policy_2_2": -128.15771484375, "logps_train/policy_2_w": -145.94931030273438, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": -1.1012537479400635, "rewards_train/1-l": -0.9394580125808716, "rewards_train/1-w": 1.783277153968811, "rewards_train/2-2": 2.1814942359924316, "rewards_train/2-w": -0.9833086729049683, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.7227351665496826, "rewards_train/margins_1": 2.8845309019088745, "rewards_train/margins_2": 3.1648029088974, "step": 213 }, { "epoch": 0.64, "learning_rate": 4.075219608954279e-06, "loss": 0.956, "step": 214 }, { "epoch": 0.64, "logps_train/policy_1_2": -195.50436401367188, "logps_train/policy_1_l": -199.95318603515625, "logps_train/policy_1_w": -122.5846176147461, "logps_train/policy_2_2": -122.93040466308594, "logps_train/policy_2_w": -189.14453125, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -2.031687021255493, "rewards_train/1-l": -2.527644157409668, "rewards_train/1-w": 2.2780613899230957, "rewards_train/2-2": 2.4120378494262695, "rewards_train/2-w": -1.7204091548919678, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.805705547332764, "rewards_train/margins_1": 4.309748411178589, "rewards_train/margins_2": 4.132447004318237, "step": 214 }, { "epoch": 0.64, "logps_train/policy_1_2": -200.96829223632812, "logps_train/policy_1_l": -152.98643493652344, "logps_train/policy_1_w": -123.63907623291016, "logps_train/policy_2_2": -138.99447631835938, "logps_train/policy_2_w": -183.04876708984375, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -1.4890174865722656, "rewards_train/1-l": -1.418271780014038, "rewards_train/1-w": 2.0007400512695312, "rewards_train/2-2": 2.3193016052246094, "rewards_train/2-w": -1.4159116744995117, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4190118312835693, "rewards_train/margins_1": 3.489757537841797, "rewards_train/margins_2": 3.735213279724121, "step": 214 }, { "epoch": 0.64, "logps_train/policy_1_2": -232.11932373046875, "logps_train/policy_1_l": -207.73202514648438, "logps_train/policy_1_w": -157.7526397705078, "logps_train/policy_2_2": -148.39486694335938, "logps_train/policy_2_w": -232.82601928710938, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -2.9767775535583496, "rewards_train/1-l": -2.002890110015869, "rewards_train/1-w": 2.7274696826934814, "rewards_train/2-2": 2.5769202709198, "rewards_train/2-w": -1.526742935180664, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.730359792709351, "rewards_train/margins_1": 5.704247236251831, "rewards_train/margins_2": 4.103663206100464, "step": 214 }, { "epoch": 0.64, "logps_train/policy_1_2": -279.11553955078125, "logps_train/policy_1_l": -189.54864501953125, "logps_train/policy_1_w": -150.26718139648438, "logps_train/policy_2_2": -198.6277313232422, "logps_train/policy_2_w": -224.2661590576172, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -231.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -1.8412423133850098, "rewards_train/1-l": -1.2016656398773193, "rewards_train/1-w": 3.482290744781494, "rewards_train/2-2": 3.2026565074920654, "rewards_train/2-w": -0.9852092862129211, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.6839563846588135, "rewards_train/margins_1": 5.323533058166504, "rewards_train/margins_2": 4.187865793704987, "step": 214 }, { "epoch": 0.64, "logps_train/policy_1_2": -228.0076904296875, "logps_train/policy_1_l": -156.124755859375, "logps_train/policy_1_w": -106.23823547363281, "logps_train/policy_2_2": -171.78579711914062, "logps_train/policy_2_w": -157.58291625976562, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -1.2460795640945435, "rewards_train/1-l": -1.5964603424072266, "rewards_train/1-w": 2.3109419345855713, "rewards_train/2-2": 2.088606595993042, "rewards_train/2-w": -0.47860437631607056, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.907402276992798, "rewards_train/margins_1": 3.5570214986801147, "rewards_train/margins_2": 2.5672109723091125, "step": 215 }, { "epoch": 0.64, "logps_train/policy_1_2": -214.34564208984375, "logps_train/policy_1_l": -196.5283203125, "logps_train/policy_1_w": -133.88450622558594, "logps_train/policy_2_2": -153.08836364746094, "logps_train/policy_2_w": -202.24697875976562, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -1.5814372301101685, "rewards_train/1-l": -2.3090810775756836, "rewards_train/1-w": 1.7107677459716797, "rewards_train/2-2": 2.3481950759887695, "rewards_train/2-w": -2.0832908153533936, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.019848823547363, "rewards_train/margins_1": 3.292204976081848, "rewards_train/margins_2": 4.431485891342163, "step": 215 }, { "epoch": 0.64, "logps_train/policy_1_2": -245.29995727539062, "logps_train/policy_1_l": -197.63662719726562, "logps_train/policy_1_w": -145.2032012939453, "logps_train/policy_2_2": -165.97579956054688, "logps_train/policy_2_w": -223.4557647705078, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -1.7128076553344727, "rewards_train/1-l": -1.8680082559585571, "rewards_train/1-w": 3.3027267456054688, "rewards_train/2-2": 3.109060049057007, "rewards_train/2-w": -0.9018267393112183, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.170735001564026, "rewards_train/margins_1": 5.015534400939941, "rewards_train/margins_2": 4.010886788368225, "step": 215 }, { "epoch": 0.64, "logps_train/policy_1_2": -146.22911071777344, "logps_train/policy_1_l": -186.11703491210938, "logps_train/policy_1_w": -116.27285766601562, "logps_train/policy_2_2": -105.81583404541016, "logps_train/policy_2_w": -162.51527404785156, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.42740312218666077, "rewards_train/1-l": -1.8977882862091064, "rewards_train/1-w": 2.267782211303711, "rewards_train/2-2": 2.041170597076416, "rewards_train/2-w": -0.30382239818573, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.165570497512817, "rewards_train/margins_1": 2.6951853334903717, "rewards_train/margins_2": 2.344992995262146, "step": 215 }, { "epoch": 0.65, "learning_rate": 4.05596442217179e-06, "loss": 0.9322, "step": 216 }, { "epoch": 0.65, "logps_train/policy_1_2": -218.73233032226562, "logps_train/policy_1_l": -181.6964111328125, "logps_train/policy_1_w": -105.10797119140625, "logps_train/policy_2_2": -160.29440307617188, "logps_train/policy_2_w": -154.47946166992188, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -1.0287007093429565, "rewards_train/1-l": -1.793811559677124, "rewards_train/1-w": 1.7663753032684326, "rewards_train/2-2": 2.5906758308410645, "rewards_train/2-w": -1.0713717937469482, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.5601868629455566, "rewards_train/margins_1": 2.795076012611389, "rewards_train/margins_2": 3.6620476245880127, "step": 216 }, { "epoch": 0.65, "logps_train/policy_1_2": -204.08367919921875, "logps_train/policy_1_l": -148.99874877929688, "logps_train/policy_1_w": -119.52816772460938, "logps_train/policy_2_2": -135.9224395751953, "logps_train/policy_2_w": -178.3907470703125, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -1.4357117414474487, "rewards_train/1-l": -1.2196013927459717, "rewards_train/1-w": 1.9827299118041992, "rewards_train/2-2": 2.4306068420410156, "rewards_train/2-w": -1.2656378746032715, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.202331304550171, "rewards_train/margins_1": 3.418441653251648, "rewards_train/margins_2": 3.696244716644287, "step": 216 }, { "epoch": 0.65, "logps_train/policy_1_2": -221.74765014648438, "logps_train/policy_1_l": -222.33554077148438, "logps_train/policy_1_w": -157.07115173339844, "logps_train/policy_2_2": -144.72767639160156, "logps_train/policy_2_w": -243.65823364257812, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": -1.810311198234558, "rewards_train/1-l": -1.9903910160064697, "rewards_train/1-w": 2.7388076782226562, "rewards_train/2-2": 2.831138849258423, "rewards_train/2-w": -2.220242738723755, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.729198694229126, "rewards_train/margins_1": 4.549118876457214, "rewards_train/margins_2": 5.051381587982178, "step": 216 }, { "epoch": 0.65, "logps_train/policy_1_2": -199.8594970703125, "logps_train/policy_1_l": -196.93585205078125, "logps_train/policy_1_w": -142.7415313720703, "logps_train/policy_2_2": -118.11527252197266, "logps_train/policy_2_w": -214.92752075195312, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -2.001575469970703, "rewards_train/1-l": -1.6724114418029785, "rewards_train/1-w": 2.682878017425537, "rewards_train/2-2": 2.630465030670166, "rewards_train/2-w": -1.2907991409301758, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.355289459228516, "rewards_train/margins_1": 4.68445348739624, "rewards_train/margins_2": 3.921264171600342, "step": 216 }, { "epoch": 0.65, "logps_train/policy_1_2": -153.28900146484375, "logps_train/policy_1_l": -115.45445251464844, "logps_train/policy_1_w": -121.71475982666016, "logps_train/policy_2_2": -106.42312622070312, "logps_train/policy_2_w": -169.1344757080078, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -105.5, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -0.3523377776145935, "rewards_train/1-l": -0.9974473118782043, "rewards_train/1-w": 2.3980557918548584, "rewards_train/2-2": 2.3270225524902344, "rewards_train/2-w": -0.5282918214797974, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3955031037330627, "rewards_train/margins_1": 2.750393569469452, "rewards_train/margins_2": 2.8553143739700317, "step": 217 }, { "epoch": 0.65, "logps_train/policy_1_2": -231.8994140625, "logps_train/policy_1_l": -169.30517578125, "logps_train/policy_1_w": -147.58753967285156, "logps_train/policy_2_2": -159.18898010253906, "logps_train/policy_2_w": -206.83334350585938, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -2.0797371864318848, "rewards_train/1-l": -1.7803215980529785, "rewards_train/1-w": 2.9795756340026855, "rewards_train/2-2": 2.6920394897460938, "rewards_train/2-w": -0.7304055094718933, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.759897232055664, "rewards_train/margins_1": 5.05931282043457, "rewards_train/margins_2": 3.422444999217987, "step": 217 }, { "epoch": 0.65, "logps_train/policy_1_2": -244.39303588867188, "logps_train/policy_1_l": -176.18417358398438, "logps_train/policy_1_w": -150.43124389648438, "logps_train/policy_2_2": -169.3089599609375, "logps_train/policy_2_w": -215.51593017578125, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -1.2795372009277344, "rewards_train/1-l": -1.341927170753479, "rewards_train/1-w": 2.542520046234131, "rewards_train/2-2": 3.4764280319213867, "rewards_train/2-w": -1.0990538597106934, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.88444721698761, "rewards_train/margins_1": 3.8220572471618652, "rewards_train/margins_2": 4.57548189163208, "step": 217 }, { "epoch": 0.65, "logps_train/policy_1_2": -199.9424285888672, "logps_train/policy_1_l": -209.77963256835938, "logps_train/policy_1_w": -148.12513732910156, "logps_train/policy_2_2": -140.27838134765625, "logps_train/policy_2_w": -206.30477905273438, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -1.0340873003005981, "rewards_train/1-l": -2.5893406867980957, "rewards_train/1-w": 2.4870963096618652, "rewards_train/2-2": 2.497943162918091, "rewards_train/2-w": -1.0218852758407593, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.076436996459961, "rewards_train/margins_1": 3.5211836099624634, "rewards_train/margins_2": 3.51982843875885, "step": 217 }, { "epoch": 0.65, "learning_rate": 4.036557376004694e-06, "loss": 0.8806, "step": 218 }, { "epoch": 0.65, "logps_train/policy_1_2": -130.95169067382812, "logps_train/policy_1_l": -88.53451538085938, "logps_train/policy_1_w": -93.61212158203125, "logps_train/policy_2_2": -92.79667663574219, "logps_train/policy_2_w": -139.31300354003906, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": -0.4578635096549988, "rewards_train/1-l": -0.5426116585731506, "rewards_train/1-w": 1.680267333984375, "rewards_train/2-2": 1.617598056793213, "rewards_train/2-w": -0.688136100769043, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.2228789925575256, "rewards_train/margins_1": 2.138130843639374, "rewards_train/margins_2": 2.305734157562256, "step": 218 }, { "epoch": 0.65, "logps_train/policy_1_2": -163.916748046875, "logps_train/policy_1_l": -113.66697692871094, "logps_train/policy_1_w": -82.90190124511719, "logps_train/policy_2_2": -114.41014099121094, "logps_train/policy_2_w": -126.87254333496094, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -115.5, "rewards_train/1-2": -0.9346441626548767, "rewards_train/1-l": -1.0103497505187988, "rewards_train/1-w": 1.247603178024292, "rewards_train/2-2": 1.8006856441497803, "rewards_train/2-w": -1.1154762506484985, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.257952928543091, "rewards_train/margins_1": 2.1822473406791687, "rewards_train/margins_2": 2.916161894798279, "step": 218 }, { "epoch": 0.65, "logps_train/policy_1_2": -189.86825561523438, "logps_train/policy_1_l": -136.82992553710938, "logps_train/policy_1_w": -103.80298614501953, "logps_train/policy_2_2": -136.21044921875, "logps_train/policy_2_w": -157.76390075683594, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -0.9869236946105957, "rewards_train/1-l": -1.0481292009353638, "rewards_train/1-w": 2.000560998916626, "rewards_train/2-2": 2.0777828693389893, "rewards_train/2-w": -1.214085340499878, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0486901998519897, "rewards_train/margins_1": 2.9874846935272217, "rewards_train/margins_2": 3.291868209838867, "step": 218 }, { "epoch": 0.65, "logps_train/policy_1_2": -300.2757873535156, "logps_train/policy_1_l": -242.82337951660156, "logps_train/policy_1_w": -158.9722442626953, "logps_train/policy_2_2": -194.6581268310547, "logps_train/policy_2_w": -240.8065948486328, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -223.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -227.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -3.5103917121887207, "rewards_train/1-l": -2.031165838241577, "rewards_train/1-w": 2.7441816329956055, "rewards_train/2-2": 3.2716870307922363, "rewards_train/2-w": -2.1837852001190186, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.775347471237183, "rewards_train/margins_1": 6.254573345184326, "rewards_train/margins_2": 5.455472230911255, "step": 218 }, { "epoch": 0.66, "logps_train/policy_1_2": -170.77468872070312, "logps_train/policy_1_l": -191.8406982421875, "logps_train/policy_1_w": -160.32460021972656, "logps_train/policy_2_2": -126.05219268798828, "logps_train/policy_2_w": -230.67379760742188, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -0.542311429977417, "rewards_train/1-l": -1.762292504310608, "rewards_train/1-w": 3.2482051849365234, "rewards_train/2-2": 2.0295472145080566, "rewards_train/2-w": -0.9226536154747009, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.010497689247131, "rewards_train/margins_1": 3.7905166149139404, "rewards_train/margins_2": 2.9522008299827576, "step": 219 }, { "epoch": 0.66, "logps_train/policy_1_2": -166.88475036621094, "logps_train/policy_1_l": -160.4972381591797, "logps_train/policy_1_w": -128.94244384765625, "logps_train/policy_2_2": -115.60040283203125, "logps_train/policy_2_w": -186.30203247070312, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.251365303993225, "rewards_train/1-l": -1.476969838142395, "rewards_train/1-w": 2.020306348800659, "rewards_train/2-2": 1.999725341796875, "rewards_train/2-w": -1.4560825824737549, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.497276186943054, "rewards_train/margins_1": 3.2716716527938843, "rewards_train/margins_2": 3.45580792427063, "step": 219 }, { "epoch": 0.66, "logps_train/policy_1_2": -229.47967529296875, "logps_train/policy_1_l": -197.2131805419922, "logps_train/policy_1_w": -165.23704528808594, "logps_train/policy_2_2": -154.2677764892578, "logps_train/policy_2_w": -239.00338745117188, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": -1.7077338695526123, "rewards_train/1-l": -2.0334277153015137, "rewards_train/1-w": 2.458716869354248, "rewards_train/2-2": 2.955254077911377, "rewards_train/2-w": -1.7296347618103027, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.492144584655762, "rewards_train/margins_1": 4.16645073890686, "rewards_train/margins_2": 4.68488883972168, "step": 219 }, { "epoch": 0.66, "logps_train/policy_1_2": -165.64129638671875, "logps_train/policy_1_l": -112.26480865478516, "logps_train/policy_1_w": -73.98985290527344, "logps_train/policy_2_2": -104.52693939208984, "logps_train/policy_2_w": -116.18143463134766, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -84.5, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -104.0, "rewards_train/1-2": -1.9793524742126465, "rewards_train/1-l": -1.127835988998413, "rewards_train/1-w": 1.0412498712539673, "rewards_train/2-2": 1.7605879306793213, "rewards_train/2-w": -1.2094520330429077, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.1690858602523804, "rewards_train/margins_1": 3.0206023454666138, "rewards_train/margins_2": 2.970039963722229, "step": 219 }, { "epoch": 0.66, "learning_rate": 4.0170003645464835e-06, "loss": 1.056, "step": 220 }, { "epoch": 0.66, "logps_train/policy_1_2": -157.84237670898438, "logps_train/policy_1_l": -156.5251922607422, "logps_train/policy_1_w": -116.16783142089844, "logps_train/policy_2_2": -101.91182708740234, "logps_train/policy_2_w": -172.34608459472656, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -1.9340428113937378, "rewards_train/1-l": -1.501835823059082, "rewards_train/1-w": 1.7617329359054565, "rewards_train/2-2": 1.5269815921783447, "rewards_train/2-w": -1.5996477603912354, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.2635687589645386, "rewards_train/margins_1": 3.6957757472991943, "rewards_train/margins_2": 3.12662935256958, "step": 220 }, { "epoch": 0.66, "logps_train/policy_1_2": -209.5069122314453, "logps_train/policy_1_l": -194.0047607421875, "logps_train/policy_1_w": -138.17236328125, "logps_train/policy_2_2": -147.0562744140625, "logps_train/policy_2_w": -201.99114990234375, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.4975674152374268, "rewards_train/1-l": -2.201598882675171, "rewards_train/1-w": 2.735107421875, "rewards_train/2-2": 2.3764030933380127, "rewards_train/2-w": -1.3502874374389648, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.936706304550171, "rewards_train/margins_1": 4.232674837112427, "rewards_train/margins_2": 3.7266905307769775, "step": 220 }, { "epoch": 0.66, "logps_train/policy_1_2": -268.200439453125, "logps_train/policy_1_l": -177.10748291015625, "logps_train/policy_1_w": -157.42250061035156, "logps_train/policy_2_2": -183.832275390625, "logps_train/policy_2_w": -240.9846954345703, "logps_train/ref_1_2": -253.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": -1.5751218795776367, "rewards_train/1-l": -1.7387757301330566, "rewards_train/1-w": 2.561069965362549, "rewards_train/2-2": 3.5577876567840576, "rewards_train/2-w": -2.3574538230895996, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.2998456954956055, "rewards_train/margins_1": 4.1361918449401855, "rewards_train/margins_2": 5.915241479873657, "step": 220 }, { "epoch": 0.66, "logps_train/policy_1_2": -214.24496459960938, "logps_train/policy_1_l": -162.36337280273438, "logps_train/policy_1_w": -119.289794921875, "logps_train/policy_2_2": -153.94058227539062, "logps_train/policy_2_w": -170.71524047851562, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.304182529449463, "rewards_train/1-l": -1.6654378175735474, "rewards_train/1-w": 2.1915283203125, "rewards_train/2-2": 2.269223213195801, "rewards_train/2-w": -0.840663492679596, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.8569661378860474, "rewards_train/margins_1": 3.495710849761963, "rewards_train/margins_2": 3.1098867058753967, "step": 220 }, { "epoch": 0.66, "logps_train/policy_1_2": -184.6929931640625, "logps_train/policy_1_l": -156.3984375, "logps_train/policy_1_w": -116.4588851928711, "logps_train/policy_2_2": -126.77542114257812, "logps_train/policy_2_w": -185.84060668945312, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -1.1591421365737915, "rewards_train/1-l": -1.4910163879394531, "rewards_train/1-w": 1.7720807790756226, "rewards_train/2-2": 2.302926540374756, "rewards_train/2-w": -2.0731236934661865, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.2630971670150757, "rewards_train/margins_1": 2.931222915649414, "rewards_train/margins_2": 4.376050233840942, "step": 221 }, { "epoch": 0.66, "logps_train/policy_1_2": -205.53306579589844, "logps_train/policy_1_l": -241.4017791748047, "logps_train/policy_1_w": -178.00392150878906, "logps_train/policy_2_2": -156.11219787597656, "logps_train/policy_2_w": -231.45816040039062, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -213.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -0.9807723760604858, "rewards_train/1-l": -2.8704514503479004, "rewards_train/1-w": 2.2934556007385254, "rewards_train/2-2": 2.2038204669952393, "rewards_train/2-w": -0.8872232437133789, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.163907051086426, "rewards_train/margins_1": 3.2742279767990112, "rewards_train/margins_2": 3.091043710708618, "step": 221 }, { "epoch": 0.66, "logps_train/policy_1_2": -182.43603515625, "logps_train/policy_1_l": -118.27851104736328, "logps_train/policy_1_w": -109.9808349609375, "logps_train/policy_2_2": -121.79978942871094, "logps_train/policy_2_w": -175.1463623046875, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -1.6918456554412842, "rewards_train/1-l": -1.1221877336502075, "rewards_train/1-w": 2.3341431617736816, "rewards_train/2-2": 2.2087416648864746, "rewards_train/2-w": -1.4273303747177124, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.456330895423889, "rewards_train/margins_1": 4.025988817214966, "rewards_train/margins_2": 3.636072039604187, "step": 221 }, { "epoch": 0.66, "logps_train/policy_1_2": -150.63485717773438, "logps_train/policy_1_l": -102.44718170166016, "logps_train/policy_1_w": -126.7484359741211, "logps_train/policy_2_2": -103.47784423828125, "logps_train/policy_2_w": -180.45936584472656, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -0.5589934587478638, "rewards_train/1-l": -0.9281167387962341, "rewards_train/1-w": 2.324375629425049, "rewards_train/2-2": 2.2664732933044434, "rewards_train/2-w": -1.4348030090332031, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.252492368221283, "rewards_train/margins_1": 2.8833690881729126, "rewards_train/margins_2": 3.7012763023376465, "step": 221 }, { "epoch": 0.66, "learning_rate": 3.997295296527001e-06, "loss": 0.9231, "step": 222 }, { "epoch": 0.66, "logps_train/policy_1_2": -233.31021118164062, "logps_train/policy_1_l": -189.92959594726562, "logps_train/policy_1_w": -123.95811462402344, "logps_train/policy_2_2": -152.3089599609375, "logps_train/policy_2_w": -200.98703002929688, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -1.8591468334197998, "rewards_train/1-l": -1.7568273544311523, "rewards_train/1-w": 2.510828971862793, "rewards_train/2-2": 2.754260540008545, "rewards_train/2-w": -2.1408891677856445, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.267656326293945, "rewards_train/margins_1": 4.369975805282593, "rewards_train/margins_2": 4.8951497077941895, "step": 222 }, { "epoch": 0.66, "logps_train/policy_1_2": -197.330810546875, "logps_train/policy_1_l": -211.23388671875, "logps_train/policy_1_w": -121.28814697265625, "logps_train/policy_2_2": -128.46876525878906, "logps_train/policy_2_w": -199.810791015625, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.6655031442642212, "rewards_train/1-l": -2.489013195037842, "rewards_train/1-w": 3.0051698684692383, "rewards_train/2-2": 2.3859357833862305, "rewards_train/2-w": -1.3318605422973633, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.49418306350708, "rewards_train/margins_1": 4.6706730127334595, "rewards_train/margins_2": 3.7177963256835938, "step": 222 }, { "epoch": 0.66, "logps_train/policy_1_2": -208.22360229492188, "logps_train/policy_1_l": -194.6532745361328, "logps_train/policy_1_w": -161.0077362060547, "logps_train/policy_2_2": -143.4341583251953, "logps_train/policy_2_w": -228.380126953125, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -1.9762670993804932, "rewards_train/1-l": -2.2577099800109863, "rewards_train/1-w": 3.016413688659668, "rewards_train/2-2": 1.977677583694458, "rewards_train/2-w": -0.9294199347496033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.274123668670654, "rewards_train/margins_1": 4.992680788040161, "rewards_train/margins_2": 2.9070975184440613, "step": 222 }, { "epoch": 0.66, "logps_train/policy_1_2": -178.77008056640625, "logps_train/policy_1_l": -173.03164672851562, "logps_train/policy_1_w": -120.30541229248047, "logps_train/policy_2_2": -107.90447998046875, "logps_train/policy_2_w": -197.283935546875, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -2.1857972145080566, "rewards_train/1-l": -1.9514074325561523, "rewards_train/1-w": 2.2087173461914062, "rewards_train/2-2": 1.9558405876159668, "rewards_train/2-w": -2.3123779296875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.160124778747559, "rewards_train/margins_1": 4.394514560699463, "rewards_train/margins_2": 4.268218517303467, "step": 222 }, { "epoch": 0.67, "logps_train/policy_1_2": -174.18763732910156, "logps_train/policy_1_l": -122.16452026367188, "logps_train/policy_1_w": -119.04232788085938, "logps_train/policy_2_2": -107.30557250976562, "logps_train/policy_2_w": -185.52615356445312, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.5521624088287354, "rewards_train/1-l": -1.4251439571380615, "rewards_train/1-w": 2.9596354961395264, "rewards_train/2-2": 2.2516698837280273, "rewards_train/2-w": -0.9518349170684814, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.384779453277588, "rewards_train/margins_1": 4.511797904968262, "rewards_train/margins_2": 3.203504800796509, "step": 223 }, { "epoch": 0.67, "logps_train/policy_1_2": -187.42044067382812, "logps_train/policy_1_l": -161.32119750976562, "logps_train/policy_1_w": -119.73847198486328, "logps_train/policy_2_2": -123.6441650390625, "logps_train/policy_2_w": -168.57778930664062, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.414308786392212, "rewards_train/1-l": -1.7676665782928467, "rewards_train/1-w": 2.064629316329956, "rewards_train/2-2": 2.44905948638916, "rewards_train/2-w": -0.8476232290267944, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8322958946228027, "rewards_train/margins_1": 3.478938102722168, "rewards_train/margins_2": 3.2966827154159546, "step": 223 }, { "epoch": 0.67, "logps_train/policy_1_2": -156.54766845703125, "logps_train/policy_1_l": -159.78265380859375, "logps_train/policy_1_w": -116.09996032714844, "logps_train/policy_2_2": -103.32334899902344, "logps_train/policy_2_w": -166.00067138671875, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.01585054397583, "rewards_train/1-l": -1.9022270441055298, "rewards_train/1-w": 2.2153940200805664, "rewards_train/2-2": 2.0578255653381348, "rewards_train/2-w": -0.6578802466392517, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.117621064186096, "rewards_train/margins_1": 3.2312445640563965, "rewards_train/margins_2": 2.7157058119773865, "step": 223 }, { "epoch": 0.67, "logps_train/policy_1_2": -203.267578125, "logps_train/policy_1_l": -163.15347290039062, "logps_train/policy_1_w": -139.69969177246094, "logps_train/policy_2_2": -141.8045654296875, "logps_train/policy_2_w": -206.1190185546875, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.0095700025558472, "rewards_train/1-l": -1.5609519481658936, "rewards_train/1-w": 2.5003433227539062, "rewards_train/2-2": 2.7582144737243652, "rewards_train/2-w": -1.591785192489624, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.0612952709198, "rewards_train/margins_1": 3.5099133253097534, "rewards_train/margins_2": 4.349999666213989, "step": 223 }, { "epoch": 0.67, "learning_rate": 3.977444095126146e-06, "loss": 0.8804, "step": 224 }, { "epoch": 0.67, "logps_train/policy_1_2": -148.7723388671875, "logps_train/policy_1_l": -143.81837463378906, "logps_train/policy_1_w": -95.66327667236328, "logps_train/policy_2_2": -96.7061538696289, "logps_train/policy_2_w": -146.303466796875, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -118.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": -0.9339470863342285, "rewards_train/1-l": -1.8376967906951904, "rewards_train/1-w": 2.066997766494751, "rewards_train/2-2": 2.1644673347473145, "rewards_train/2-w": -1.0327634811401367, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9046945571899414, "rewards_train/margins_1": 3.0009448528289795, "rewards_train/margins_2": 3.197230815887451, "step": 224 }, { "epoch": 0.67, "logps_train/policy_1_2": -149.89952087402344, "logps_train/policy_1_l": -120.44723510742188, "logps_train/policy_1_w": -80.39035034179688, "logps_train/policy_2_2": -102.98214721679688, "logps_train/policy_2_w": -131.42596435546875, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -96.5, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -121.5, "rewards_train/1-2": -0.7962021827697754, "rewards_train/1-l": -1.065866470336914, "rewards_train/1-w": 1.6145786046981812, "rewards_train/2-2": 2.163503885269165, "rewards_train/2-w": -1.0138847827911377, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.680445075035095, "rewards_train/margins_1": 2.4107807874679565, "rewards_train/margins_2": 3.1773886680603027, "step": 224 }, { "epoch": 0.67, "logps_train/policy_1_2": -174.88494873046875, "logps_train/policy_1_l": -130.4998779296875, "logps_train/policy_1_w": -101.612060546875, "logps_train/policy_2_2": -122.31318664550781, "logps_train/policy_2_w": -149.07858276367188, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -0.8904477953910828, "rewards_train/1-l": -1.4505741596221924, "rewards_train/1-w": 1.7534420490264893, "rewards_train/2-2": 2.242314338684082, "rewards_train/2-w": -1.0961394309997559, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2040162086486816, "rewards_train/margins_1": 2.643889844417572, "rewards_train/margins_2": 3.338453769683838, "step": 224 }, { "epoch": 0.67, "logps_train/policy_1_2": -177.86297607421875, "logps_train/policy_1_l": -176.31243896484375, "logps_train/policy_1_w": -136.64996337890625, "logps_train/policy_2_2": -121.75247192382812, "logps_train/policy_2_w": -208.73779296875, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -0.8647165894508362, "rewards_train/1-l": -1.8871026039123535, "rewards_train/1-w": 2.6842212677001953, "rewards_train/2-2": 2.3247532844543457, "rewards_train/2-w": -1.5128428936004639, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.571323871612549, "rewards_train/margins_1": 3.5489378571510315, "rewards_train/margins_2": 3.8375961780548096, "step": 224 }, { "epoch": 0.67, "logps_train/policy_1_2": -120.76922607421875, "logps_train/policy_1_l": -117.90348815917969, "logps_train/policy_1_w": -89.48893737792969, "logps_train/policy_2_2": -82.06245422363281, "logps_train/policy_2_w": -148.18109130859375, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": -0.9573911428451538, "rewards_train/1-l": -1.4449385404586792, "rewards_train/1-w": 1.9065759181976318, "rewards_train/2-2": 1.2828173637390137, "rewards_train/2-w": -1.68959379196167, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.351514458656311, "rewards_train/margins_1": 2.8639670610427856, "rewards_train/margins_2": 2.9724111557006836, "step": 225 }, { "epoch": 0.67, "logps_train/policy_1_2": -220.34376525878906, "logps_train/policy_1_l": -168.443115234375, "logps_train/policy_1_w": -101.45368194580078, "logps_train/policy_2_2": -151.47740173339844, "logps_train/policy_2_w": -163.623046875, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -2.1601576805114746, "rewards_train/1-l": -1.6114025115966797, "rewards_train/1-w": 2.063908576965332, "rewards_train/2-2": 2.1210098266601562, "rewards_train/2-w": -1.623436689376831, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.6753110885620117, "rewards_train/margins_1": 4.224066257476807, "rewards_train/margins_2": 3.7444465160369873, "step": 225 }, { "epoch": 0.67, "logps_train/policy_1_2": -237.3521270751953, "logps_train/policy_1_l": -200.47976684570312, "logps_train/policy_1_w": -157.22943115234375, "logps_train/policy_2_2": -171.19589233398438, "logps_train/policy_2_w": -213.88783264160156, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -1.3122634887695312, "rewards_train/1-l": -1.9003630876541138, "rewards_train/1-w": 2.4586496353149414, "rewards_train/2-2": 2.8931055068969727, "rewards_train/2-w": -0.7840946912765503, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.359012722969055, "rewards_train/margins_1": 3.7709131240844727, "rewards_train/margins_2": 3.677200198173523, "step": 225 }, { "epoch": 0.67, "logps_train/policy_1_2": -242.67819213867188, "logps_train/policy_1_l": -143.450439453125, "logps_train/policy_1_w": -105.42976379394531, "logps_train/policy_2_2": -158.29327392578125, "logps_train/policy_2_w": -161.92840576171875, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -1.8061020374298096, "rewards_train/1-l": -1.3070547580718994, "rewards_train/1-w": 2.2371997833251953, "rewards_train/2-2": 3.5050477981567383, "rewards_train/2-w": -1.1338560581207275, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.5442545413970947, "rewards_train/margins_1": 4.043301820755005, "rewards_train/margins_2": 4.638903856277466, "step": 225 }, { "epoch": 0.68, "learning_rate": 3.957448697786181e-06, "loss": 0.9959, "step": 226 }, { "epoch": 0.68, "logps_train/policy_1_2": -168.5762939453125, "logps_train/policy_1_l": -142.99359130859375, "logps_train/policy_1_w": -120.55171966552734, "logps_train/policy_2_2": -117.67825317382812, "logps_train/policy_2_w": -168.10842895507812, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.4134886264801025, "rewards_train/1-l": -0.9802179336547852, "rewards_train/1-w": 1.8647499084472656, "rewards_train/2-2": 1.61635422706604, "rewards_train/2-w": -0.8616251349449158, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.844967842102051, "rewards_train/margins_1": 3.278238534927368, "rewards_train/margins_2": 2.477979362010956, "step": 226 }, { "epoch": 0.68, "logps_train/policy_1_2": -138.67333984375, "logps_train/policy_1_l": -83.97146606445312, "logps_train/policy_1_w": -97.33439636230469, "logps_train/policy_2_2": -92.02188110351562, "logps_train/policy_2_w": -149.83267211914062, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": -0.9876470565795898, "rewards_train/1-l": -0.7428499460220337, "rewards_train/1-w": 1.5956621170043945, "rewards_train/2-2": 2.1325769424438477, "rewards_train/2-w": -1.829850196838379, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.3385120630264282, "rewards_train/margins_1": 2.5833091735839844, "rewards_train/margins_2": 3.9624271392822266, "step": 226 }, { "epoch": 0.68, "logps_train/policy_1_2": -201.51834106445312, "logps_train/policy_1_l": -151.47312927246094, "logps_train/policy_1_w": -102.86590576171875, "logps_train/policy_2_2": -140.345947265625, "logps_train/policy_2_w": -161.44473266601562, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -1.409257173538208, "rewards_train/1-l": -1.3680155277252197, "rewards_train/1-w": 2.0731754302978516, "rewards_train/2-2": 2.524291753768921, "rewards_train/2-w": -1.4952545166015625, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4411909580230713, "rewards_train/margins_1": 3.4824326038360596, "rewards_train/margins_2": 4.019546270370483, "step": 226 }, { "epoch": 0.68, "logps_train/policy_1_2": -172.11715698242188, "logps_train/policy_1_l": -174.2812042236328, "logps_train/policy_1_w": -115.26338195800781, "logps_train/policy_2_2": -126.51127624511719, "logps_train/policy_2_w": -168.87693786621094, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.4152313470840454, "rewards_train/1-l": -1.9081984758377075, "rewards_train/1-w": 2.5158495903015137, "rewards_train/2-2": 2.3980908393859863, "rewards_train/2-w": -1.0212876796722412, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.424048066139221, "rewards_train/margins_1": 2.931080937385559, "rewards_train/margins_2": 3.4193785190582275, "step": 226 }, { "epoch": 0.68, "logps_train/policy_1_2": -155.04275512695312, "logps_train/policy_1_l": -131.9775390625, "logps_train/policy_1_w": -104.23281860351562, "logps_train/policy_2_2": -122.87895202636719, "logps_train/policy_2_w": -131.9620819091797, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -0.011307179927825928, "rewards_train/1-l": -1.3963868618011475, "rewards_train/1-w": 1.9527921676635742, "rewards_train/2-2": 2.168354034423828, "rewards_train/2-w": 0.31629207730293274, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3491790294647217, "rewards_train/margins_1": 1.9640993475914001, "rewards_train/margins_2": 1.8520619571208954, "step": 227 }, { "epoch": 0.68, "logps_train/policy_1_2": -88.44007873535156, "logps_train/policy_1_l": -97.97760009765625, "logps_train/policy_1_w": -61.31478500366211, "logps_train/policy_2_2": -73.34425354003906, "logps_train/policy_2_w": -77.8399658203125, "logps_train/ref_1_2": -90.5, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -73.5, "logps_train/ref_2_2": -83.0, "logps_train/ref_2_w": -80.5, "rewards_train/1-2": 0.19056235253810883, "rewards_train/1-l": -1.576080083847046, "rewards_train/1-w": 1.2181308269500732, "rewards_train/2-2": 0.9821761846542358, "rewards_train/2-w": 0.2472536712884903, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.794210910797119, "rewards_train/margins_1": 1.0275684744119644, "rewards_train/margins_2": 0.7349225133657455, "step": 227 }, { "epoch": 0.68, "logps_train/policy_1_2": -205.22964477539062, "logps_train/policy_1_l": -184.31573486328125, "logps_train/policy_1_w": -129.5274658203125, "logps_train/policy_2_2": -127.69599914550781, "logps_train/policy_2_w": -196.9603271484375, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -2.623745918273926, "rewards_train/1-l": -1.5470032691955566, "rewards_train/1-w": 2.332995891571045, "rewards_train/2-2": 1.8639940023422241, "rewards_train/2-w": -1.685096025466919, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8799991607666016, "rewards_train/margins_1": 4.956741809844971, "rewards_train/margins_2": 3.549090027809143, "step": 227 }, { "epoch": 0.68, "logps_train/policy_1_2": -199.09432983398438, "logps_train/policy_1_l": -161.34219360351562, "logps_train/policy_1_w": -124.29280090332031, "logps_train/policy_2_2": -142.4645538330078, "logps_train/policy_2_w": -190.9990234375, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.4154865741729736, "rewards_train/1-l": -1.5152747631072998, "rewards_train/1-w": 2.446500301361084, "rewards_train/2-2": 2.330204486846924, "rewards_train/2-w": -1.8178709745407104, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.961775064468384, "rewards_train/margins_1": 3.8619868755340576, "rewards_train/margins_2": 4.148075461387634, "step": 227 }, { "epoch": 0.68, "learning_rate": 3.937311056022634e-06, "loss": 1.1965, "step": 228 }, { "epoch": 0.68, "logps_train/policy_1_2": -185.10092163085938, "logps_train/policy_1_l": -143.68687438964844, "logps_train/policy_1_w": -133.322265625, "logps_train/policy_2_2": -132.05982971191406, "logps_train/policy_2_w": -198.63304138183594, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -1.0051591396331787, "rewards_train/1-l": -1.4403177499771118, "rewards_train/1-w": 2.413086414337158, "rewards_train/2-2": 2.2022690773010254, "rewards_train/2-w": -1.3773671388626099, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.85340416431427, "rewards_train/margins_1": 3.418245553970337, "rewards_train/margins_2": 3.5796362161636353, "step": 228 }, { "epoch": 0.68, "logps_train/policy_1_2": -236.15805053710938, "logps_train/policy_1_l": -206.47891235351562, "logps_train/policy_1_w": -177.51507568359375, "logps_train/policy_2_2": -166.01817321777344, "logps_train/policy_2_w": -251.13754272460938, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -207.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -237.0, "rewards_train/1-2": -1.522446632385254, "rewards_train/1-l": -2.268984794616699, "rewards_train/1-w": 2.9227113723754883, "rewards_train/2-2": 2.839198350906372, "rewards_train/2-w": -1.4137547016143799, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.1916961669921875, "rewards_train/margins_1": 4.445158004760742, "rewards_train/margins_2": 4.252953052520752, "step": 228 }, { "epoch": 0.68, "logps_train/policy_1_2": -178.74253845214844, "logps_train/policy_1_l": -154.42417907714844, "logps_train/policy_1_w": -122.98291015625, "logps_train/policy_2_2": -124.01924133300781, "logps_train/policy_2_w": -192.99365234375, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -0.9660506844520569, "rewards_train/1-l": -1.9150748252868652, "rewards_train/1-w": 2.296337366104126, "rewards_train/2-2": 2.3885059356689453, "rewards_train/2-w": -2.1050305366516113, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.211412191390991, "rewards_train/margins_1": 3.262388050556183, "rewards_train/margins_2": 4.493536472320557, "step": 228 }, { "epoch": 0.68, "logps_train/policy_1_2": -222.18930053710938, "logps_train/policy_1_l": -183.93505859375, "logps_train/policy_1_w": -129.1396484375, "logps_train/policy_2_2": -166.44140625, "logps_train/policy_2_w": -179.60592651367188, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.0759613513946533, "rewards_train/1-l": -1.762645959854126, "rewards_train/1-w": 1.959472417831421, "rewards_train/2-2": 2.3949217796325684, "rewards_train/2-w": -0.9105934500694275, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.722118377685547, "rewards_train/margins_1": 3.035433769226074, "rewards_train/margins_2": 3.305515229701996, "step": 228 }, { "epoch": 0.69, "logps_train/policy_1_2": -177.15777587890625, "logps_train/policy_1_l": -152.7384033203125, "logps_train/policy_1_w": -109.38992309570312, "logps_train/policy_2_2": -123.40939331054688, "logps_train/policy_2_w": -168.3597412109375, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.9440985918045044, "rewards_train/1-l": -1.8794578313827515, "rewards_train/1-w": 2.269212007522583, "rewards_train/2-2": 1.6596465110778809, "rewards_train/2-w": -1.1574594974517822, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.1486698389053345, "rewards_train/margins_1": 4.213310599327087, "rewards_train/margins_2": 2.817106008529663, "step": 229 }, { "epoch": 0.69, "logps_train/policy_1_2": -224.82626342773438, "logps_train/policy_1_l": -257.97283935546875, "logps_train/policy_1_w": -139.269287109375, "logps_train/policy_2_2": -151.46726989746094, "logps_train/policy_2_w": -206.94601440429688, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -231.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": -1.549617052078247, "rewards_train/1-l": -2.706756114959717, "rewards_train/1-w": 2.2586188316345215, "rewards_train/2-2": 2.9390153884887695, "rewards_train/2-w": -1.8078837394714355, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.965374946594238, "rewards_train/margins_1": 3.8082358837127686, "rewards_train/margins_2": 4.746899127960205, "step": 229 }, { "epoch": 0.69, "logps_train/policy_1_2": -160.08035278320312, "logps_train/policy_1_l": -129.17971801757812, "logps_train/policy_1_w": -102.73526000976562, "logps_train/policy_2_2": -116.31565856933594, "logps_train/policy_2_w": -156.73818969726562, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -110.5, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -0.49123942852020264, "rewards_train/1-l": -1.8460965156555176, "rewards_train/1-w": 1.7784764766693115, "rewards_train/2-2": 2.173121452331543, "rewards_train/2-w": -1.4755780696868896, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.624572992324829, "rewards_train/margins_1": 2.269715905189514, "rewards_train/margins_2": 3.6486995220184326, "step": 229 }, { "epoch": 0.69, "logps_train/policy_1_2": -141.84243774414062, "logps_train/policy_1_l": -142.21861267089844, "logps_train/policy_1_w": -109.02886962890625, "logps_train/policy_2_2": -103.86582946777344, "logps_train/policy_2_w": -153.83184814453125, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": -0.4436179995536804, "rewards_train/1-l": -1.6256704330444336, "rewards_train/1-w": 2.587738037109375, "rewards_train/2-2": 1.8614634275436401, "rewards_train/2-w": 0.03947223722934723, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.213408470153809, "rewards_train/margins_1": 3.0313560366630554, "rewards_train/margins_2": 1.821991190314293, "step": 229 }, { "epoch": 0.69, "learning_rate": 3.917033135233845e-06, "loss": 0.9901, "step": 230 }, { "epoch": 0.69, "logps_train/policy_1_2": -198.68557739257812, "logps_train/policy_1_l": -157.0570068359375, "logps_train/policy_1_w": -126.08069610595703, "logps_train/policy_2_2": -134.71145629882812, "logps_train/policy_2_w": -200.41293334960938, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -0.866213858127594, "rewards_train/1-l": -1.3632197380065918, "rewards_train/1-w": 2.0809926986694336, "rewards_train/2-2": 3.0272927284240723, "rewards_train/2-w": -2.056918144226074, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.4442124366760254, "rewards_train/margins_1": 2.9472065567970276, "rewards_train/margins_2": 5.0842108726501465, "step": 230 }, { "epoch": 0.69, "logps_train/policy_1_2": -204.41207885742188, "logps_train/policy_1_l": -195.35647583007812, "logps_train/policy_1_w": -148.4629364013672, "logps_train/policy_2_2": -142.03094482421875, "logps_train/policy_2_w": -214.5452117919922, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -1.746286392211914, "rewards_train/1-l": -2.676077365875244, "rewards_train/1-w": 2.8695273399353027, "rewards_train/2-2": 2.2461252212524414, "rewards_train/2-w": -1.0275681018829346, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.545604705810547, "rewards_train/margins_1": 4.615813732147217, "rewards_train/margins_2": 3.273693323135376, "step": 230 }, { "epoch": 0.69, "logps_train/policy_1_2": -261.17333984375, "logps_train/policy_1_l": -236.83961486816406, "logps_train/policy_1_w": -234.7405242919922, "logps_train/policy_2_2": -177.11575317382812, "logps_train/policy_2_w": -321.0611572265625, "logps_train/ref_1_2": -247.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -264.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -302.0, "rewards_train/1-2": -1.4548345804214478, "rewards_train/1-l": -1.4223411083221436, "rewards_train/1-w": 2.8759474754333496, "rewards_train/2-2": 3.2462363243103027, "rewards_train/2-w": -1.8518167734146118, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.298288583755493, "rewards_train/margins_1": 4.330782055854797, "rewards_train/margins_2": 5.0980530977249146, "step": 230 }, { "epoch": 0.69, "logps_train/policy_1_2": -171.56741333007812, "logps_train/policy_1_l": -183.96958923339844, "logps_train/policy_1_w": -126.43993377685547, "logps_train/policy_2_2": -125.30695343017578, "logps_train/policy_2_w": -185.86865234375, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -0.6069369316101074, "rewards_train/1-l": -2.328599214553833, "rewards_train/1-w": 2.7614755630493164, "rewards_train/2-2": 2.256120443344116, "rewards_train/2-w": -0.6837407350540161, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.090074777603149, "rewards_train/margins_1": 3.368412494659424, "rewards_train/margins_2": 2.9398611783981323, "step": 230 }, { "epoch": 0.69, "logps_train/policy_1_2": -182.40158081054688, "logps_train/policy_1_l": -155.57666015625, "logps_train/policy_1_w": -111.87556457519531, "logps_train/policy_2_2": -114.50257873535156, "logps_train/policy_2_w": -162.77297973632812, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -2.193868637084961, "rewards_train/1-l": -1.2983872890472412, "rewards_train/1-w": 1.6950604915618896, "rewards_train/2-2": 1.4454206228256226, "rewards_train/2-w": -1.3155796527862549, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.993447780609131, "rewards_train/margins_1": 3.8889291286468506, "rewards_train/margins_2": 2.7610002756118774, "step": 231 }, { "epoch": 0.69, "logps_train/policy_1_2": -182.1942138671875, "logps_train/policy_1_l": -124.27369689941406, "logps_train/policy_1_w": -104.64799499511719, "logps_train/policy_2_2": -134.5726318359375, "logps_train/policy_2_w": -143.33909606933594, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -0.8282097578048706, "rewards_train/1-l": -0.9228777885437012, "rewards_train/1-w": 1.769380807876587, "rewards_train/2-2": 2.1603145599365234, "rewards_train/2-w": -0.5089104175567627, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.692258596420288, "rewards_train/margins_1": 2.5975905656814575, "rewards_train/margins_2": 2.669224977493286, "step": 231 }, { "epoch": 0.69, "logps_train/policy_1_2": -198.73812866210938, "logps_train/policy_1_l": -159.37420654296875, "logps_train/policy_1_w": -129.2037353515625, "logps_train/policy_2_2": -149.2364501953125, "logps_train/policy_2_w": -175.12265014648438, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.7202971577644348, "rewards_train/1-l": -1.7340517044067383, "rewards_train/1-w": 2.319323778152466, "rewards_train/2-2": 2.2763538360595703, "rewards_train/2-w": -0.4859963655471802, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.053375482559204, "rewards_train/margins_1": 3.0396209359169006, "rewards_train/margins_2": 2.7623502016067505, "step": 231 }, { "epoch": 0.69, "logps_train/policy_1_2": -200.9384307861328, "logps_train/policy_1_l": -163.01425170898438, "logps_train/policy_1_w": -137.40907287597656, "logps_train/policy_2_2": -135.93922424316406, "logps_train/policy_2_w": -210.27239990234375, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.4118120670318604, "rewards_train/1-l": -1.3130948543548584, "rewards_train/1-w": 2.756359100341797, "rewards_train/2-2": 2.444553852081299, "rewards_train/2-w": -1.2545843124389648, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.069453954696655, "rewards_train/margins_1": 4.168171167373657, "rewards_train/margins_2": 3.6991381645202637, "step": 231 }, { "epoch": 0.69, "learning_rate": 3.8966169145091314e-06, "loss": 0.9365, "step": 232 }, { "epoch": 0.69, "logps_train/policy_1_2": -210.7498321533203, "logps_train/policy_1_l": -152.87567138671875, "logps_train/policy_1_w": -120.12353515625, "logps_train/policy_2_2": -143.90093994140625, "logps_train/policy_2_w": -168.58792114257812, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -1.755256175994873, "rewards_train/1-l": -1.2001891136169434, "rewards_train/1-w": 1.6660157442092896, "rewards_train/2-2": 2.2936954498291016, "rewards_train/2-w": -1.2605254650115967, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.866204857826233, "rewards_train/margins_1": 3.4212719202041626, "rewards_train/margins_2": 3.5542209148406982, "step": 232 }, { "epoch": 0.69, "logps_train/policy_1_2": -205.72586059570312, "logps_train/policy_1_l": -112.49932861328125, "logps_train/policy_1_w": -112.84478759765625, "logps_train/policy_2_2": -150.80599975585938, "logps_train/policy_2_w": -153.31788635253906, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -101.5, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -0.9296180009841919, "rewards_train/1-l": -1.10872220993042, "rewards_train/1-w": 1.8139584064483643, "rewards_train/2-2": 2.4897117614746094, "rewards_train/2-w": -0.7978035807609558, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.922680616378784, "rewards_train/margins_1": 2.743576407432556, "rewards_train/margins_2": 3.287515342235565, "step": 232 }, { "epoch": 0.69, "logps_train/policy_1_2": -196.84494018554688, "logps_train/policy_1_l": -194.06106567382812, "logps_train/policy_1_w": -129.3641815185547, "logps_train/policy_2_2": -141.42640686035156, "logps_train/policy_2_w": -175.16000366210938, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -1.3161354064941406, "rewards_train/1-l": -2.345364570617676, "rewards_train/1-w": 2.0202720165252686, "rewards_train/2-2": 2.2693703174591064, "rewards_train/2-w": -0.6398265361785889, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.365636587142944, "rewards_train/margins_1": 3.336407423019409, "rewards_train/margins_2": 2.9091968536376953, "step": 232 }, { "epoch": 0.69, "logps_train/policy_1_2": -171.6537322998047, "logps_train/policy_1_l": -162.4381866455078, "logps_train/policy_1_w": -119.92312622070312, "logps_train/policy_2_2": -123.27005004882812, "logps_train/policy_2_w": -164.56369018554688, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -0.7095131874084473, "rewards_train/1-l": -1.913741111755371, "rewards_train/1-w": 2.0072972774505615, "rewards_train/2-2": 2.2780730724334717, "rewards_train/2-w": -0.6251198053359985, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.9210383892059326, "rewards_train/margins_1": 2.716810464859009, "rewards_train/margins_2": 2.90319287776947, "step": 232 }, { "epoch": 0.7, "logps_train/policy_1_2": -151.67156982421875, "logps_train/policy_1_l": -138.8526611328125, "logps_train/policy_1_w": -91.40020751953125, "logps_train/policy_2_2": -102.04547119140625, "logps_train/policy_2_w": -141.00946044921875, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -1.2068538665771484, "rewards_train/1-l": -1.8304812908172607, "rewards_train/1-w": 2.1435728073120117, "rewards_train/2-2": 1.968303918838501, "rewards_train/2-w": -0.7079764604568481, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9740540981292725, "rewards_train/margins_1": 3.35042667388916, "rewards_train/margins_2": 2.676280379295349, "step": 233 }, { "epoch": 0.7, "logps_train/policy_1_2": -267.5989990234375, "logps_train/policy_1_l": -197.28541564941406, "logps_train/policy_1_w": -122.44694519042969, "logps_train/policy_2_2": -196.265380859375, "logps_train/policy_2_w": -182.77777099609375, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -3.108336925506592, "rewards_train/1-l": -1.7857190370559692, "rewards_train/1-w": 2.0306966304779053, "rewards_train/2-2": 1.2355718612670898, "rewards_train/2-w": -1.1922309398651123, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.8164156675338745, "rewards_train/margins_1": 5.139033555984497, "rewards_train/margins_2": 2.427802801132202, "step": 233 }, { "epoch": 0.7, "logps_train/policy_1_2": -204.91375732421875, "logps_train/policy_1_l": -192.59661865234375, "logps_train/policy_1_w": -168.1948699951172, "logps_train/policy_2_2": -150.53421020507812, "logps_train/policy_2_w": -217.44485473632812, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -1.4852240085601807, "rewards_train/1-l": -1.8210875988006592, "rewards_train/1-w": 2.1383259296417236, "rewards_train/2-2": 1.8497040271759033, "rewards_train/2-w": -0.9671413898468018, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.959413528442383, "rewards_train/margins_1": 3.6235499382019043, "rewards_train/margins_2": 2.816845417022705, "step": 233 }, { "epoch": 0.7, "logps_train/policy_1_2": -227.303955078125, "logps_train/policy_1_l": -228.62330627441406, "logps_train/policy_1_w": -148.03677368164062, "logps_train/policy_2_2": -160.98651123046875, "logps_train/policy_2_w": -214.99696350097656, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": -1.2413337230682373, "rewards_train/1-l": -2.4523696899414062, "rewards_train/1-w": 2.420151710510254, "rewards_train/2-2": 2.2622861862182617, "rewards_train/2-w": -1.3887592554092407, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.87252140045166, "rewards_train/margins_1": 3.661485433578491, "rewards_train/margins_2": 3.6510454416275024, "step": 233 }, { "epoch": 0.7, "learning_rate": 3.876064386435646e-06, "loss": 1.3234, "step": 234 }, { "epoch": 0.7, "logps_train/policy_1_2": -136.7818145751953, "logps_train/policy_1_l": -66.23884582519531, "logps_train/policy_1_w": -69.67826843261719, "logps_train/policy_2_2": -100.60418701171875, "logps_train/policy_2_w": -99.76914978027344, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -57.5, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -97.5, "rewards_train/1-2": -0.6336500644683838, "rewards_train/1-l": -0.8729573488235474, "rewards_train/1-w": 1.4407670497894287, "rewards_train/2-2": 1.5444645881652832, "rewards_train/2-w": -0.21480567753314972, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.313724398612976, "rewards_train/margins_1": 2.0744171142578125, "rewards_train/margins_2": 1.759270265698433, "step": 234 }, { "epoch": 0.7, "logps_train/policy_1_2": -155.09701538085938, "logps_train/policy_1_l": -148.92501831054688, "logps_train/policy_1_w": -115.5565185546875, "logps_train/policy_2_2": -111.27298736572266, "logps_train/policy_2_w": -185.8444366455078, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.5739606618881226, "rewards_train/1-l": -1.2016820907592773, "rewards_train/1-w": 2.709972858428955, "rewards_train/2-2": 1.9361776113510132, "rewards_train/2-w": -1.5957715511322021, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9116549491882324, "rewards_train/margins_1": 3.2839335203170776, "rewards_train/margins_2": 3.5319491624832153, "step": 234 }, { "epoch": 0.7, "logps_train/policy_1_2": -121.38025665283203, "logps_train/policy_1_l": -180.2803955078125, "logps_train/policy_1_w": -107.48480224609375, "logps_train/policy_2_2": -89.19309997558594, "logps_train/policy_2_w": -152.48995971679688, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -102.5, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -0.438806414604187, "rewards_train/1-l": -1.7487432956695557, "rewards_train/1-w": 2.28120756149292, "rewards_train/2-2": 1.3482682704925537, "rewards_train/2-w": -0.14782491326332092, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.625, "rewards_train/margins": 4.029950857162476, "rewards_train/margins_1": 2.720013976097107, "rewards_train/margins_2": 1.4960931837558746, "step": 234 }, { "epoch": 0.7, "logps_train/policy_1_2": -242.95852661132812, "logps_train/policy_1_l": -259.85125732421875, "logps_train/policy_1_w": -173.45068359375, "logps_train/policy_2_2": -184.21746826171875, "logps_train/policy_2_w": -220.77059936523438, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -235.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": -0.6901881694793701, "rewards_train/1-l": -2.5327811241149902, "rewards_train/1-w": 2.3092281818389893, "rewards_train/2-2": 2.6089179515838623, "rewards_train/2-w": -0.30479294061660767, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.8420093059539795, "rewards_train/margins_1": 2.9994163513183594, "rewards_train/margins_2": 2.91371089220047, "step": 234 }, { "epoch": 0.7, "logps_train/policy_1_2": -215.57077026367188, "logps_train/policy_1_l": -195.06082153320312, "logps_train/policy_1_w": -161.567138671875, "logps_train/policy_2_2": -148.65579223632812, "logps_train/policy_2_w": -233.98565673828125, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -1.1535601615905762, "rewards_train/1-l": -1.5599884986877441, "rewards_train/1-w": 2.6780505180358887, "rewards_train/2-2": 2.7415974140167236, "rewards_train/2-w": -1.3259094953536987, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.238039016723633, "rewards_train/margins_1": 3.831610679626465, "rewards_train/margins_2": 4.067506909370422, "step": 235 }, { "epoch": 0.7, "logps_train/policy_1_2": -223.7032470703125, "logps_train/policy_1_l": -160.72323608398438, "logps_train/policy_1_w": -119.7579345703125, "logps_train/policy_2_2": -141.5437469482422, "logps_train/policy_2_w": -189.24880981445312, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -2.1949350833892822, "rewards_train/1-l": -1.6488862037658691, "rewards_train/1-w": 2.4601433277130127, "rewards_train/2-2": 2.8128132820129395, "rewards_train/2-w": -1.275663137435913, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.109029531478882, "rewards_train/margins_1": 4.655078411102295, "rewards_train/margins_2": 4.0884764194488525, "step": 235 }, { "epoch": 0.7, "logps_train/policy_1_2": -220.2164306640625, "logps_train/policy_1_l": -189.31358337402344, "logps_train/policy_1_w": -171.73370361328125, "logps_train/policy_2_2": -167.46218872070312, "logps_train/policy_2_w": -218.27001953125, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -0.584144115447998, "rewards_train/1-l": -1.9028431177139282, "rewards_train/1-w": 2.830536127090454, "rewards_train/2-2": 2.7264366149902344, "rewards_train/2-w": 0.12377941608428955, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.733379244804382, "rewards_train/margins_1": 3.414680242538452, "rewards_train/margins_2": 2.602657198905945, "step": 235 }, { "epoch": 0.7, "logps_train/policy_1_2": -209.77560424804688, "logps_train/policy_1_l": -158.30751037597656, "logps_train/policy_1_w": -138.4820556640625, "logps_train/policy_2_2": -138.9027099609375, "logps_train/policy_2_w": -210.02316284179688, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.8666226863861084, "rewards_train/1-l": -1.148133397102356, "rewards_train/1-w": 3.2039425373077393, "rewards_train/2-2": 2.277308464050293, "rewards_train/2-w": -0.9751678705215454, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.352075934410095, "rewards_train/margins_1": 5.070565223693848, "rewards_train/margins_2": 3.2524763345718384, "step": 235 }, { "epoch": 0.71, "learning_rate": 3.855377556903897e-06, "loss": 1.0448, "step": 236 }, { "epoch": 0.71, "logps_train/policy_1_2": -191.36883544921875, "logps_train/policy_1_l": -186.91864013671875, "logps_train/policy_1_w": -126.78495025634766, "logps_train/policy_2_2": -137.3134765625, "logps_train/policy_2_w": -184.60687255859375, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -0.677117109298706, "rewards_train/1-l": -1.8356635570526123, "rewards_train/1-w": 2.312617778778076, "rewards_train/2-2": 2.529784679412842, "rewards_train/2-w": -1.0505307912826538, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.1482813358306885, "rewards_train/margins_1": 2.9897348880767822, "rewards_train/margins_2": 3.5803154706954956, "step": 236 }, { "epoch": 0.71, "logps_train/policy_1_2": -228.060546875, "logps_train/policy_1_l": -177.79150390625, "logps_train/policy_1_w": -152.28604125976562, "logps_train/policy_2_2": -156.924072265625, "logps_train/policy_2_w": -239.78512573242188, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": -0.9638670682907104, "rewards_train/1-l": -0.9908685684204102, "rewards_train/1-w": 3.1206154823303223, "rewards_train/2-2": 2.9806394577026367, "rewards_train/2-w": -1.8617150783538818, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.111484050750732, "rewards_train/margins_1": 4.084482550621033, "rewards_train/margins_2": 4.8423545360565186, "step": 236 }, { "epoch": 0.71, "logps_train/policy_1_2": -199.07022094726562, "logps_train/policy_1_l": -165.93612670898438, "logps_train/policy_1_w": -164.65431213378906, "logps_train/policy_2_2": -145.30589294433594, "logps_train/policy_2_w": -220.2309112548828, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": -0.5444249510765076, "rewards_train/1-l": -1.4210726022720337, "rewards_train/1-w": 3.0520005226135254, "rewards_train/2-2": 2.717848300933838, "rewards_train/2-w": -0.29203659296035767, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.473073124885559, "rewards_train/margins_1": 3.596425473690033, "rewards_train/margins_2": 3.0098848938941956, "step": 236 }, { "epoch": 0.71, "logps_train/policy_1_2": -135.14337158203125, "logps_train/policy_1_l": -107.36164855957031, "logps_train/policy_1_w": -78.91398620605469, "logps_train/policy_2_2": -90.17295837402344, "logps_train/policy_2_w": -107.90515899658203, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -104.0, "rewards_train/1-2": -1.0694150924682617, "rewards_train/1-l": -1.5742506980895996, "rewards_train/1-w": 1.099031686782837, "rewards_train/2-2": 1.8362197875976562, "rewards_train/2-w": -0.39325010776519775, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.6732823848724365, "rewards_train/margins_1": 2.1684467792510986, "rewards_train/margins_2": 2.229469895362854, "step": 236 }, { "epoch": 0.71, "logps_train/policy_1_2": -248.25054931640625, "logps_train/policy_1_l": -204.74342346191406, "logps_train/policy_1_w": -126.69999694824219, "logps_train/policy_2_2": -165.1510009765625, "logps_train/policy_2_w": -197.75515747070312, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -1.8219313621520996, "rewards_train/1-l": -1.8871344327926636, "rewards_train/1-w": 2.6759228706359863, "rewards_train/2-2": 3.3684937953948975, "rewards_train/2-w": -1.5058866739273071, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.56305730342865, "rewards_train/margins_1": 4.497854232788086, "rewards_train/margins_2": 4.874380469322205, "step": 237 }, { "epoch": 0.71, "logps_train/policy_1_2": -162.73779296875, "logps_train/policy_1_l": -186.74000549316406, "logps_train/policy_1_w": -172.5855255126953, "logps_train/policy_2_2": -116.62886047363281, "logps_train/policy_2_w": -231.3048095703125, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": -0.33706140518188477, "rewards_train/1-l": -1.5654067993164062, "rewards_train/1-w": 3.516448497772217, "rewards_train/2-2": 2.1976613998413086, "rewards_train/2-w": 0.0023312270641326904, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.081855297088623, "rewards_train/margins_1": 3.8535099029541016, "rewards_train/margins_2": 2.195330172777176, "step": 237 }, { "epoch": 0.71, "logps_train/policy_1_2": -233.34669494628906, "logps_train/policy_1_l": -219.682861328125, "logps_train/policy_1_w": -145.76321411132812, "logps_train/policy_2_2": -144.63136291503906, "logps_train/policy_2_w": -243.68907165527344, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -2.098731517791748, "rewards_train/1-l": -1.2362550497055054, "rewards_train/1-w": 2.605710506439209, "rewards_train/2-2": 2.799363613128662, "rewards_train/2-w": -2.426720142364502, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8419655561447144, "rewards_train/margins_1": 4.704442024230957, "rewards_train/margins_2": 5.226083755493164, "step": 237 }, { "epoch": 0.71, "logps_train/policy_1_2": -232.1734619140625, "logps_train/policy_1_l": -260.98468017578125, "logps_train/policy_1_w": -200.117919921875, "logps_train/policy_2_2": -177.85073852539062, "logps_train/policy_2_w": -273.171875, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -239.0, "logps_train/ref_1_w": -235.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": -0.1939089596271515, "rewards_train/1-l": -2.1547210216522217, "rewards_train/1-w": 3.4928956031799316, "rewards_train/2-2": 3.2297701835632324, "rewards_train/2-w": -0.49062812328338623, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.647616624832153, "rewards_train/margins_1": 3.686804562807083, "rewards_train/margins_2": 3.7203983068466187, "step": 237 }, { "epoch": 0.71, "learning_rate": 3.834558444911978e-06, "loss": 0.9533, "step": 238 }, { "epoch": 0.71, "logps_train/policy_1_2": -148.91049194335938, "logps_train/policy_1_l": -140.76028442382812, "logps_train/policy_1_w": -91.73300170898438, "logps_train/policy_2_2": -104.84532165527344, "logps_train/policy_2_w": -134.82225036621094, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": -0.6930018663406372, "rewards_train/1-l": -2.0108554363250732, "rewards_train/1-w": 2.4575586318969727, "rewards_train/2-2": 2.040468215942383, "rewards_train/2-w": -0.271677166223526, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.468414068222046, "rewards_train/margins_1": 3.15056049823761, "rewards_train/margins_2": 2.312145382165909, "step": 238 }, { "epoch": 0.71, "logps_train/policy_1_2": -242.640625, "logps_train/policy_1_l": -224.09727478027344, "logps_train/policy_1_w": -146.13116455078125, "logps_train/policy_2_2": -155.917724609375, "logps_train/policy_2_w": -219.80540466308594, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -1.465624213218689, "rewards_train/1-l": -1.990586757659912, "rewards_train/1-w": 2.87516450881958, "rewards_train/2-2": 3.379711151123047, "rewards_train/2-w": -1.2688212394714355, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.865751266479492, "rewards_train/margins_1": 4.340788722038269, "rewards_train/margins_2": 4.648532390594482, "step": 238 }, { "epoch": 0.71, "logps_train/policy_1_2": -183.9181365966797, "logps_train/policy_1_l": -145.79034423828125, "logps_train/policy_1_w": -131.5709686279297, "logps_train/policy_2_2": -122.29743194580078, "logps_train/policy_2_w": -196.82391357421875, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.3406418561935425, "rewards_train/1-l": -1.9075489044189453, "rewards_train/1-w": 2.5450520515441895, "rewards_train/2-2": 2.1024832725524902, "rewards_train/2-w": -0.6937207579612732, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.452600955963135, "rewards_train/margins_1": 3.885693907737732, "rewards_train/margins_2": 2.7962040305137634, "step": 238 }, { "epoch": 0.71, "logps_train/policy_1_2": -159.7161865234375, "logps_train/policy_1_l": -146.53065490722656, "logps_train/policy_1_w": -124.9783935546875, "logps_train/policy_2_2": -107.52973175048828, "logps_train/policy_2_w": -184.75152587890625, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -0.6942753195762634, "rewards_train/1-l": -1.7196667194366455, "rewards_train/1-w": 2.678723096847534, "rewards_train/2-2": 2.4710497856140137, "rewards_train/2-w": -0.906597375869751, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.39838981628418, "rewards_train/margins_1": 3.3729984164237976, "rewards_train/margins_2": 3.3776471614837646, "step": 238 }, { "epoch": 0.72, "logps_train/policy_1_2": -171.7362060546875, "logps_train/policy_1_l": -167.3579559326172, "logps_train/policy_1_w": -127.278564453125, "logps_train/policy_2_2": -119.76473999023438, "logps_train/policy_2_w": -182.3020782470703, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -0.5622933506965637, "rewards_train/1-l": -1.2834522724151611, "rewards_train/1-w": 2.5002689361572266, "rewards_train/2-2": 2.145792007446289, "rewards_train/2-w": -0.7825513482093811, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7837212085723877, "rewards_train/margins_1": 3.0625622868537903, "rewards_train/margins_2": 2.92834335565567, "step": 239 }, { "epoch": 0.72, "logps_train/policy_1_2": -216.44644165039062, "logps_train/policy_1_l": -225.96771240234375, "logps_train/policy_1_w": -145.26943969726562, "logps_train/policy_2_2": -142.8117218017578, "logps_train/policy_2_w": -229.62222290039062, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -1.774330973625183, "rewards_train/1-l": -2.0898265838623047, "rewards_train/1-w": 2.519063949584961, "rewards_train/2-2": 2.4360156059265137, "rewards_train/2-w": -2.3458776473999023, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.608890533447266, "rewards_train/margins_1": 4.293394923210144, "rewards_train/margins_2": 4.781893253326416, "step": 239 }, { "epoch": 0.72, "logps_train/policy_1_2": -203.971435546875, "logps_train/policy_1_l": -238.03477478027344, "logps_train/policy_1_w": -170.19451904296875, "logps_train/policy_2_2": -136.001708984375, "logps_train/policy_2_w": -245.22996520996094, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": -1.5471429824829102, "rewards_train/1-l": -2.056553840637207, "rewards_train/1-w": 3.7383596897125244, "rewards_train/2-2": 2.7177977561950684, "rewards_train/2-w": -0.7351052165031433, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.7949135303497314, "rewards_train/margins_1": 5.285502672195435, "rewards_train/margins_2": 3.4529029726982117, "step": 239 }, { "epoch": 0.72, "logps_train/policy_1_2": -200.47509765625, "logps_train/policy_1_l": -160.81011962890625, "logps_train/policy_1_w": -115.42001342773438, "logps_train/policy_2_2": -138.45828247070312, "logps_train/policy_2_w": -168.27993774414062, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -1.6133301258087158, "rewards_train/1-l": -1.6535710096359253, "rewards_train/1-w": 2.420742988586426, "rewards_train/2-2": 2.406125068664551, "rewards_train/2-w": -0.5605135560035706, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.074313998222351, "rewards_train/margins_1": 4.034073114395142, "rewards_train/margins_2": 2.9666386246681213, "step": 239 }, { "epoch": 0.72, "learning_rate": 3.8136090823685156e-06, "loss": 1.1066, "step": 240 }, { "epoch": 0.72, "logps_train/policy_1_2": -163.17897033691406, "logps_train/policy_1_l": -150.4315185546875, "logps_train/policy_1_w": -131.398681640625, "logps_train/policy_2_2": -116.562744140625, "logps_train/policy_2_w": -196.169189453125, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": 0.25515085458755493, "rewards_train/1-l": -1.4117071628570557, "rewards_train/1-w": 2.8577873706817627, "rewards_train/2-2": 2.6661863327026367, "rewards_train/2-w": -0.9161393046379089, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.269494533538818, "rewards_train/margins_1": 2.6026365160942078, "rewards_train/margins_2": 3.5823256373405457, "step": 240 }, { "epoch": 0.72, "logps_train/policy_1_2": -280.4395751953125, "logps_train/policy_1_l": -202.2923126220703, "logps_train/policy_1_w": -136.15521240234375, "logps_train/policy_2_2": -199.05169677734375, "logps_train/policy_2_w": -223.2572784423828, "logps_train/ref_1_2": -262.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -234.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -1.7797023057937622, "rewards_train/1-l": -2.10591459274292, "rewards_train/1-w": 3.15205717086792, "rewards_train/2-2": 3.4645090103149414, "rewards_train/2-w": -1.984322190284729, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.25797176361084, "rewards_train/margins_1": 4.931759476661682, "rewards_train/margins_2": 5.44883120059967, "step": 240 }, { "epoch": 0.72, "logps_train/policy_1_2": -208.81784057617188, "logps_train/policy_1_l": -164.11195373535156, "logps_train/policy_1_w": -181.45672607421875, "logps_train/policy_2_2": -158.5016632080078, "logps_train/policy_2_w": -232.35726928710938, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": -0.5435030460357666, "rewards_train/1-l": -1.4913709163665771, "rewards_train/1-w": 3.0525708198547363, "rewards_train/2-2": 2.7326462268829346, "rewards_train/2-w": -0.13533595204353333, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.5439417362213135, "rewards_train/margins_1": 3.596073865890503, "rewards_train/margins_2": 2.867982178926468, "step": 240 }, { "epoch": 0.72, "logps_train/policy_1_2": -134.18576049804688, "logps_train/policy_1_l": -111.3178482055664, "logps_train/policy_1_w": -69.59149932861328, "logps_train/policy_2_2": -90.57207489013672, "logps_train/policy_2_w": -96.41590881347656, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -97.5, "logps_train/ref_1_w": -78.5, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": -0.6883020997047424, "rewards_train/1-l": -1.3767802715301514, "rewards_train/1-w": 0.9125299453735352, "rewards_train/2-2": 1.9069523811340332, "rewards_train/2-w": -0.4451064467430115, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.2893102169036865, "rewards_train/margins_1": 1.6008320450782776, "rewards_train/margins_2": 2.3520588278770447, "step": 240 }, { "epoch": 0.72, "logps_train/policy_1_2": -156.22703552246094, "logps_train/policy_1_l": -180.22373962402344, "logps_train/policy_1_w": -112.88478088378906, "logps_train/policy_2_2": -109.90026092529297, "logps_train/policy_2_w": -162.05343627929688, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -0.46274247765541077, "rewards_train/1-l": -2.3149514198303223, "rewards_train/1-w": 1.9166005849838257, "rewards_train/2-2": 1.9859505891799927, "rewards_train/2-w": -1.2194061279296875, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.231552004814148, "rewards_train/margins_1": 2.3793430626392365, "rewards_train/margins_2": 3.20535671710968, "step": 241 }, { "epoch": 0.72, "logps_train/policy_1_2": -167.26641845703125, "logps_train/policy_1_l": -163.50901794433594, "logps_train/policy_1_w": -124.0607681274414, "logps_train/policy_2_2": -106.27972412109375, "logps_train/policy_2_w": -176.89161682128906, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.4340639114379883, "rewards_train/1-l": -1.364403486251831, "rewards_train/1-w": 2.5400166511535645, "rewards_train/2-2": 2.2005436420440674, "rewards_train/2-w": -0.5040061473846436, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.9044201374053955, "rewards_train/margins_1": 3.9740805625915527, "rewards_train/margins_2": 2.704549789428711, "step": 241 }, { "epoch": 0.72, "logps_train/policy_1_2": -186.8341064453125, "logps_train/policy_1_l": -173.35833740234375, "logps_train/policy_1_w": -93.77546691894531, "logps_train/policy_2_2": -121.48081970214844, "logps_train/policy_2_w": -152.03102111816406, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.3623157739639282, "rewards_train/1-l": -2.6169867515563965, "rewards_train/1-w": 2.0968680381774902, "rewards_train/2-2": 2.5030903816223145, "rewards_train/2-w": -1.2269301414489746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.713854789733887, "rewards_train/margins_1": 3.4591838121414185, "rewards_train/margins_2": 3.730020523071289, "step": 241 }, { "epoch": 0.72, "logps_train/policy_1_2": -145.32937622070312, "logps_train/policy_1_l": -127.53195190429688, "logps_train/policy_1_w": -82.57292938232422, "logps_train/policy_2_2": -93.87249755859375, "logps_train/policy_2_w": -129.06942749023438, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": -1.1534459590911865, "rewards_train/1-l": -1.5711004734039307, "rewards_train/1-w": 1.9693119525909424, "rewards_train/2-2": 2.276031255722046, "rewards_train/2-w": -0.6800873279571533, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.540412425994873, "rewards_train/margins_1": 3.122757911682129, "rewards_train/margins_2": 2.956118583679199, "step": 241 }, { "epoch": 0.72, "learning_rate": 3.792531513894365e-06, "loss": 1.1706, "step": 242 }, { "epoch": 0.72, "logps_train/policy_1_2": -220.1220703125, "logps_train/policy_1_l": -198.427978515625, "logps_train/policy_1_w": -161.519287109375, "logps_train/policy_2_2": -160.89364624023438, "logps_train/policy_2_w": -227.29769897460938, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -0.948925793170929, "rewards_train/1-l": -2.0345466136932373, "rewards_train/1-w": 2.4845938682556152, "rewards_train/2-2": 2.786417007446289, "rewards_train/2-w": -1.3253751993179321, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.5191404819488525, "rewards_train/margins_1": 3.433519661426544, "rewards_train/margins_2": 4.111792206764221, "step": 242 }, { "epoch": 0.72, "logps_train/policy_1_2": -187.5538787841797, "logps_train/policy_1_l": -138.66726684570312, "logps_train/policy_1_w": -116.65158081054688, "logps_train/policy_2_2": -123.11734008789062, "logps_train/policy_2_w": -183.95533752441406, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.144792079925537, "rewards_train/1-l": -1.5684843063354492, "rewards_train/1-w": 2.754373073577881, "rewards_train/2-2": 2.313021659851074, "rewards_train/2-w": -1.0586196184158325, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.32285737991333, "rewards_train/margins_1": 3.899165153503418, "rewards_train/margins_2": 3.3716412782669067, "step": 242 }, { "epoch": 0.72, "logps_train/policy_1_2": -248.3643798828125, "logps_train/policy_1_l": -237.20932006835938, "logps_train/policy_1_w": -135.95501708984375, "logps_train/policy_2_2": -181.38011169433594, "logps_train/policy_2_w": -198.25494384765625, "logps_train/ref_1_2": -241.0, "logps_train/ref_1_l": -217.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -217.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -0.7196416854858398, "rewards_train/1-l": -2.048764705657959, "rewards_train/1-w": 2.3752026557922363, "rewards_train/2-2": 3.5006613731384277, "rewards_train/2-w": -1.312995195388794, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.423967361450195, "rewards_train/margins_1": 3.094844341278076, "rewards_train/margins_2": 4.813656568527222, "step": 242 }, { "epoch": 0.72, "logps_train/policy_1_2": -176.19979858398438, "logps_train/policy_1_l": -135.09120178222656, "logps_train/policy_1_w": -104.42768096923828, "logps_train/policy_2_2": -119.54856872558594, "logps_train/policy_2_w": -143.89813232421875, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -1.4545499086380005, "rewards_train/1-l": -1.439882516860962, "rewards_train/1-w": 1.5753960609436035, "rewards_train/2-2": 1.9252214431762695, "rewards_train/2-w": -0.5244808793067932, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.0152785778045654, "rewards_train/margins_1": 3.029945969581604, "rewards_train/margins_2": 2.4497023224830627, "step": 242 }, { "epoch": 0.73, "logps_train/policy_1_2": -209.97012329101562, "logps_train/policy_1_l": -190.52420043945312, "logps_train/policy_1_w": -176.39939880371094, "logps_train/policy_2_2": -155.12939453125, "logps_train/policy_2_w": -237.79324340820312, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": -1.1181066036224365, "rewards_train/1-l": -1.3801054954528809, "rewards_train/1-w": 2.916701078414917, "rewards_train/2-2": 2.203857183456421, "rewards_train/2-w": -0.8716095685958862, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.296806573867798, "rewards_train/margins_1": 4.0348076820373535, "rewards_train/margins_2": 3.075466752052307, "step": 243 }, { "epoch": 0.73, "logps_train/policy_1_2": -172.0023651123047, "logps_train/policy_1_l": -155.79409790039062, "logps_train/policy_1_w": -142.623779296875, "logps_train/policy_2_2": -116.96961975097656, "logps_train/policy_2_w": -210.72311401367188, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.0619549751281738, "rewards_train/1-l": -0.7909332513809204, "rewards_train/1-w": 2.7407476902008057, "rewards_train/2-2": 2.192295789718628, "rewards_train/2-w": -1.3508272171020508, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.531680941581726, "rewards_train/margins_1": 3.8027026653289795, "rewards_train/margins_2": 3.5431230068206787, "step": 243 }, { "epoch": 0.73, "logps_train/policy_1_2": -158.66050720214844, "logps_train/policy_1_l": -159.9781036376953, "logps_train/policy_1_w": -120.99661254882812, "logps_train/policy_2_2": -114.48660278320312, "logps_train/policy_2_w": -175.38772583007812, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.5156601071357727, "rewards_train/1-l": -1.9559162855148315, "rewards_train/1-w": 2.6261203289031982, "rewards_train/2-2": 2.0540740489959717, "rewards_train/2-w": -0.5458039045333862, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.58203661441803, "rewards_train/margins_1": 3.141780436038971, "rewards_train/margins_2": 2.599877953529358, "step": 243 }, { "epoch": 0.73, "logps_train/policy_1_2": -107.24897003173828, "logps_train/policy_1_l": -118.64978790283203, "logps_train/policy_1_w": -112.55496978759766, "logps_train/policy_2_2": -79.02538299560547, "logps_train/policy_2_w": -152.65699768066406, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -0.7331000566482544, "rewards_train/1-l": -1.2108280658721924, "rewards_train/1-w": 1.8751673698425293, "rewards_train/2-2": 0.8931647539138794, "rewards_train/2-w": -0.693825364112854, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.0859954357147217, "rewards_train/margins_1": 2.6082674264907837, "rewards_train/margins_2": 1.5869901180267334, "step": 243 }, { "epoch": 0.73, "learning_rate": 3.7713277966230514e-06, "loss": 1.0993, "step": 244 }, { "epoch": 0.73, "logps_train/policy_1_2": -237.19677734375, "logps_train/policy_1_l": -202.04298400878906, "logps_train/policy_1_w": -150.66146850585938, "logps_train/policy_2_2": -148.35403442382812, "logps_train/policy_2_w": -240.55685424804688, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -2.5368642807006836, "rewards_train/1-l": -2.173476219177246, "rewards_train/1-w": 2.595571517944336, "rewards_train/2-2": 2.8111777305603027, "rewards_train/2-w": -2.0537326335906982, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.769047737121582, "rewards_train/margins_1": 5.1324357986450195, "rewards_train/margins_2": 4.864910364151001, "step": 244 }, { "epoch": 0.73, "logps_train/policy_1_2": -172.35427856445312, "logps_train/policy_1_l": -151.57289123535156, "logps_train/policy_1_w": -109.52998352050781, "logps_train/policy_2_2": -115.63107299804688, "logps_train/policy_2_w": -158.37301635742188, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -1.2174586057662964, "rewards_train/1-l": -1.843909740447998, "rewards_train/1-w": 1.80569326877594, "rewards_train/2-2": 2.068142890930176, "rewards_train/2-w": -1.0819309949874878, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.649603009223938, "rewards_train/margins_1": 3.0231518745422363, "rewards_train/margins_2": 3.1500738859176636, "step": 244 }, { "epoch": 0.73, "logps_train/policy_1_2": -197.6853790283203, "logps_train/policy_1_l": -201.98504638671875, "logps_train/policy_1_w": -152.0240478515625, "logps_train/policy_2_2": -133.56643676757812, "logps_train/policy_2_w": -236.97586059570312, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -1.3158035278320312, "rewards_train/1-l": -2.5449888706207275, "rewards_train/1-w": 2.969470500946045, "rewards_train/2-2": 2.3761680126190186, "rewards_train/2-w": -2.288992166519165, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.5144593715667725, "rewards_train/margins_1": 4.285274028778076, "rewards_train/margins_2": 4.665160179138184, "step": 244 }, { "epoch": 0.73, "logps_train/policy_1_2": -92.26026916503906, "logps_train/policy_1_l": -97.66928100585938, "logps_train/policy_1_w": -68.31351470947266, "logps_train/policy_2_2": -60.584991455078125, "logps_train/policy_2_w": -101.92291259765625, "logps_train/ref_1_2": -87.0, "logps_train/ref_1_l": -86.5, "logps_train/ref_1_w": -82.5, "logps_train/ref_2_2": -73.0, "logps_train/ref_2_w": -96.5, "rewards_train/1-2": -0.533058226108551, "rewards_train/1-l": -1.1278529167175293, "rewards_train/1-w": 1.4307576417922974, "rewards_train/2-2": 1.2576137781143188, "rewards_train/2-w": -0.5279361009597778, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.5586105585098267, "rewards_train/margins_1": 1.9638158679008484, "rewards_train/margins_2": 1.7855498790740967, "step": 244 }, { "epoch": 0.73, "logps_train/policy_1_2": -184.91392517089844, "logps_train/policy_1_l": -172.15945434570312, "logps_train/policy_1_w": -138.06826782226562, "logps_train/policy_2_2": -120.20356750488281, "logps_train/policy_2_w": -196.94317626953125, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -1.3714700937271118, "rewards_train/1-l": -1.5940953493118286, "rewards_train/1-w": 2.298447370529175, "rewards_train/2-2": 2.435502052307129, "rewards_train/2-w": -1.1950981616973877, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.8925427198410034, "rewards_train/margins_1": 3.6699174642562866, "rewards_train/margins_2": 3.6306002140045166, "step": 245 }, { "epoch": 0.73, "logps_train/policy_1_2": -153.01348876953125, "logps_train/policy_1_l": -174.7246551513672, "logps_train/policy_1_w": -126.5228271484375, "logps_train/policy_2_2": -103.97575378417969, "logps_train/policy_2_w": -191.1104736328125, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -1.151349425315857, "rewards_train/1-l": -2.2156288623809814, "rewards_train/1-w": 2.339905261993408, "rewards_train/2-2": 1.758479356765747, "rewards_train/2-w": -1.2403438091278076, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.55553412437439, "rewards_train/margins_1": 3.491254687309265, "rewards_train/margins_2": 2.9988231658935547, "step": 245 }, { "epoch": 0.73, "logps_train/policy_1_2": -195.43881225585938, "logps_train/policy_1_l": -159.27171325683594, "logps_train/policy_1_w": -97.35352325439453, "logps_train/policy_2_2": -135.1912078857422, "logps_train/policy_2_w": -140.47665405273438, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": -1.4575529098510742, "rewards_train/1-l": -2.0679917335510254, "rewards_train/1-w": 1.7634751796722412, "rewards_train/2-2": 2.2062690258026123, "rewards_train/2-w": -0.8289154171943665, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8314669132232666, "rewards_train/margins_1": 3.2210280895233154, "rewards_train/margins_2": 3.0351844429969788, "step": 245 }, { "epoch": 0.73, "logps_train/policy_1_2": -213.02255249023438, "logps_train/policy_1_l": -201.3686065673828, "logps_train/policy_1_w": -151.40155029296875, "logps_train/policy_2_2": -141.2021484375, "logps_train/policy_2_w": -239.89486694335938, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": -1.977253794670105, "rewards_train/1-l": -1.8227975368499756, "rewards_train/1-w": 2.957892417907715, "rewards_train/2-2": 2.4040045738220215, "rewards_train/2-w": -2.2894859313964844, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.78068995475769, "rewards_train/margins_1": 4.93514621257782, "rewards_train/margins_2": 4.693490505218506, "step": 245 }, { "epoch": 0.74, "learning_rate": 3.7500000000000005e-06, "loss": 0.9411, "step": 246 }, { "epoch": 0.74, "logps_train/policy_1_2": -208.1610107421875, "logps_train/policy_1_l": -207.87786865234375, "logps_train/policy_1_w": -116.7721939086914, "logps_train/policy_2_2": -141.79122924804688, "logps_train/policy_2_w": -181.1646728515625, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.3754773139953613, "rewards_train/1-l": -2.4192559719085693, "rewards_train/1-w": 2.2993431091308594, "rewards_train/2-2": 2.3661887645721436, "rewards_train/2-w": -1.3426387310028076, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.718599081039429, "rewards_train/margins_1": 3.6748204231262207, "rewards_train/margins_2": 3.708827495574951, "step": 246 }, { "epoch": 0.74, "logps_train/policy_1_2": -169.4623260498047, "logps_train/policy_1_l": -171.82925415039062, "logps_train/policy_1_w": -160.4573974609375, "logps_train/policy_2_2": -128.33602905273438, "logps_train/policy_2_w": -213.1964111328125, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -0.7632250189781189, "rewards_train/1-l": -2.05069899559021, "rewards_train/1-w": 2.226768970489502, "rewards_train/2-2": 1.714834451675415, "rewards_train/2-w": -0.9299914836883545, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.277467966079712, "rewards_train/margins_1": 2.989993989467621, "rewards_train/margins_2": 2.6448259353637695, "step": 246 }, { "epoch": 0.74, "logps_train/policy_1_2": -163.8354034423828, "logps_train/policy_1_l": -146.0919189453125, "logps_train/policy_1_w": -94.02256774902344, "logps_train/policy_2_2": -114.7198486328125, "logps_train/policy_2_w": -148.3955078125, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -1.0390093326568604, "rewards_train/1-l": -1.7531135082244873, "rewards_train/1-w": 1.793738842010498, "rewards_train/2-2": 1.8986202478408813, "rewards_train/2-w": -1.0006828308105469, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5468523502349854, "rewards_train/margins_1": 2.8327481746673584, "rewards_train/margins_2": 2.8993030786514282, "step": 246 }, { "epoch": 0.74, "logps_train/policy_1_2": -165.50283813476562, "logps_train/policy_1_l": -195.89471435546875, "logps_train/policy_1_w": -145.44456481933594, "logps_train/policy_2_2": -110.05340576171875, "logps_train/policy_2_w": -212.95213317871094, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": -1.4034078121185303, "rewards_train/1-l": -1.7800965309143066, "rewards_train/1-w": 2.485816478729248, "rewards_train/2-2": 2.021904945373535, "rewards_train/2-w": -1.4113264083862305, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.265913009643555, "rewards_train/margins_1": 3.8892242908477783, "rewards_train/margins_2": 3.4332313537597656, "step": 246 }, { "epoch": 0.74, "logps_train/policy_1_2": -234.94326782226562, "logps_train/policy_1_l": -198.087158203125, "logps_train/policy_1_w": -144.97393798828125, "logps_train/policy_2_2": -164.41534423828125, "logps_train/policy_2_w": -201.18798828125, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -1.5626857280731201, "rewards_train/1-l": -2.206005811691284, "rewards_train/1-w": 2.3666939735412598, "rewards_train/2-2": 2.9006528854370117, "rewards_train/2-w": -0.9027830958366394, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.572699785232544, "rewards_train/margins_1": 3.92937970161438, "rewards_train/margins_2": 3.803435981273651, "step": 247 }, { "epoch": 0.74, "logps_train/policy_1_2": -224.51309204101562, "logps_train/policy_1_l": -162.78057861328125, "logps_train/policy_1_w": -124.54922485351562, "logps_train/policy_2_2": -159.78489685058594, "logps_train/policy_2_w": -181.1860809326172, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -0.625529408454895, "rewards_train/1-l": -1.1682933568954468, "rewards_train/1-w": 2.5140233039855957, "rewards_train/2-2": 3.2539310455322266, "rewards_train/2-w": -0.8564992547035217, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.6823166608810425, "rewards_train/margins_1": 3.1395527124404907, "rewards_train/margins_2": 4.110430300235748, "step": 247 }, { "epoch": 0.74, "logps_train/policy_1_2": -190.01828002929688, "logps_train/policy_1_l": -140.495849609375, "logps_train/policy_1_w": -115.46955108642578, "logps_train/policy_2_2": -130.27813720703125, "logps_train/policy_2_w": -175.18101501464844, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -1.4811261892318726, "rewards_train/1-l": -1.951539158821106, "rewards_train/1-w": 1.943962812423706, "rewards_train/2-2": 2.0399842262268066, "rewards_train/2-w": -1.385288119316101, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.895501971244812, "rewards_train/margins_1": 3.4250890016555786, "rewards_train/margins_2": 3.4252723455429077, "step": 247 }, { "epoch": 0.74, "logps_train/policy_1_2": -199.2274169921875, "logps_train/policy_1_l": -215.768798828125, "logps_train/policy_1_w": -169.6462860107422, "logps_train/policy_2_2": -141.6120147705078, "logps_train/policy_2_w": -248.9739990234375, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": -1.0962774753570557, "rewards_train/1-l": -2.108250856399536, "rewards_train/1-w": 2.8657190799713135, "rewards_train/2-2": 2.189286708831787, "rewards_train/2-w": -1.637048602104187, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.97396993637085, "rewards_train/margins_1": 3.961996555328369, "rewards_train/margins_2": 3.826335310935974, "step": 247 }, { "epoch": 0.74, "learning_rate": 3.728550205580564e-06, "loss": 0.9611, "step": 248 }, { "epoch": 0.74, "logps_train/policy_1_2": -206.26693725585938, "logps_train/policy_1_l": -180.8904571533203, "logps_train/policy_1_w": -156.81854248046875, "logps_train/policy_2_2": -141.97189331054688, "logps_train/policy_2_w": -222.66326904296875, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -1.784116506576538, "rewards_train/1-l": -1.9201014041900635, "rewards_train/1-w": 2.639629602432251, "rewards_train/2-2": 2.0641391277313232, "rewards_train/2-w": -1.6130070686340332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.5597310066223145, "rewards_train/margins_1": 4.423746109008789, "rewards_train/margins_2": 3.6771461963653564, "step": 248 }, { "epoch": 0.74, "logps_train/policy_1_2": -181.74359130859375, "logps_train/policy_1_l": -193.68295288085938, "logps_train/policy_1_w": -149.90957641601562, "logps_train/policy_2_2": -124.74067687988281, "logps_train/policy_2_w": -220.74786376953125, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -0.9466239809989929, "rewards_train/1-l": -1.7940765619277954, "rewards_train/1-w": 2.9672446250915527, "rewards_train/2-2": 2.325150489807129, "rewards_train/2-w": -1.3450987339019775, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.761321187019348, "rewards_train/margins_1": 3.9138686060905457, "rewards_train/margins_2": 3.6702492237091064, "step": 248 }, { "epoch": 0.74, "logps_train/policy_1_2": -209.3478240966797, "logps_train/policy_1_l": -225.2131805419922, "logps_train/policy_1_w": -160.2301025390625, "logps_train/policy_2_2": -153.24163818359375, "logps_train/policy_2_w": -211.01336669921875, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -1.1847827434539795, "rewards_train/1-l": -2.706864833831787, "rewards_train/1-w": 2.3699593544006348, "rewards_train/2-2": 2.446929931640625, "rewards_train/2-w": -0.819305419921875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.076824188232422, "rewards_train/margins_1": 3.5547420978546143, "rewards_train/margins_2": 3.2662353515625, "step": 248 }, { "epoch": 0.74, "logps_train/policy_1_2": -156.80712890625, "logps_train/policy_1_l": -164.04185485839844, "logps_train/policy_1_w": -91.8908462524414, "logps_train/policy_2_2": -105.3797607421875, "logps_train/policy_2_w": -132.9621124267578, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -126.5, "rewards_train/1-2": -0.5012196898460388, "rewards_train/1-l": -1.597691535949707, "rewards_train/1-w": 1.9597433805465698, "rewards_train/2-2": 2.0714964866638184, "rewards_train/2-w": -0.6269727349281311, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.557434916496277, "rewards_train/margins_1": 2.4609630703926086, "rewards_train/margins_2": 2.6984692215919495, "step": 248 }, { "epoch": 0.75, "logps_train/policy_1_2": -187.09774780273438, "logps_train/policy_1_l": -171.10302734375, "logps_train/policy_1_w": -132.2066192626953, "logps_train/policy_2_2": -122.49046325683594, "logps_train/policy_2_w": -207.39743041992188, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.5402445793151855, "rewards_train/1-l": -1.8508802652359009, "rewards_train/1-w": 2.6703531742095947, "rewards_train/2-2": 2.233766555786133, "rewards_train/2-w": -1.6665008068084717, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.521233439445496, "rewards_train/margins_1": 4.21059775352478, "rewards_train/margins_2": 3.9002673625946045, "step": 249 }, { "epoch": 0.75, "logps_train/policy_1_2": -257.80181884765625, "logps_train/policy_1_l": -176.5245361328125, "logps_train/policy_1_w": -148.19976806640625, "logps_train/policy_2_2": -191.82286071777344, "logps_train/policy_2_w": -197.28709411621094, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -1.1286191940307617, "rewards_train/1-l": -1.730187177658081, "rewards_train/1-w": 2.6300227642059326, "rewards_train/2-2": 2.802088737487793, "rewards_train/2-w": -0.22089648246765137, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.360209941864014, "rewards_train/margins_1": 3.7586419582366943, "rewards_train/margins_2": 3.0229852199554443, "step": 249 }, { "epoch": 0.75, "logps_train/policy_1_2": -167.1641387939453, "logps_train/policy_1_l": -180.3472137451172, "logps_train/policy_1_w": -111.61691284179688, "logps_train/policy_2_2": -110.79656982421875, "logps_train/policy_2_w": -169.597412109375, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -1.2780349254608154, "rewards_train/1-l": -2.1763722896575928, "rewards_train/1-w": 1.931960940361023, "rewards_train/2-2": 2.1722958087921143, "rewards_train/2-w": -1.3909921646118164, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.108333230018616, "rewards_train/margins_1": 3.2099958658218384, "rewards_train/margins_2": 3.5632879734039307, "step": 249 }, { "epoch": 0.75, "logps_train/policy_1_2": -145.703857421875, "logps_train/policy_1_l": -103.49176025390625, "logps_train/policy_1_w": -103.21684265136719, "logps_train/policy_2_2": -96.57908630371094, "logps_train/policy_2_w": -153.84698486328125, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -0.8977289199829102, "rewards_train/1-l": -1.2016657590866089, "rewards_train/1-w": 2.114253282546997, "rewards_train/2-2": 1.9850599765777588, "rewards_train/2-w": -0.8331351280212402, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.315919041633606, "rewards_train/margins_1": 3.0119822025299072, "rewards_train/margins_2": 2.818195104598999, "step": 249 }, { "epoch": 0.75, "learning_rate": 3.7069805068268626e-06, "loss": 0.8957, "step": 250 }, { "epoch": 0.75, "logps_train/policy_1_2": -185.73654174804688, "logps_train/policy_1_l": -210.27304077148438, "logps_train/policy_1_w": -154.34365844726562, "logps_train/policy_2_2": -123.56156921386719, "logps_train/policy_2_w": -243.2815704345703, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": -1.0603728294372559, "rewards_train/1-l": -2.502694606781006, "rewards_train/1-w": 3.261727809906006, "rewards_train/2-2": 2.4844677448272705, "rewards_train/2-w": -2.2805001735687256, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.764422416687012, "rewards_train/margins_1": 4.322100639343262, "rewards_train/margins_2": 4.764967918395996, "step": 250 }, { "epoch": 0.75, "logps_train/policy_1_2": -200.52490234375, "logps_train/policy_1_l": -166.80728149414062, "logps_train/policy_1_w": -118.37360382080078, "logps_train/policy_2_2": -137.72030639648438, "logps_train/policy_2_w": -179.3617706298828, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -1.4185535907745361, "rewards_train/1-l": -2.124478340148926, "rewards_train/1-w": 2.238029718399048, "rewards_train/2-2": 2.5553250312805176, "rewards_train/2-w": -1.4124958515167236, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.362508058547974, "rewards_train/margins_1": 3.656583309173584, "rewards_train/margins_2": 3.967820882797241, "step": 250 }, { "epoch": 0.75, "logps_train/policy_1_2": -232.89598083496094, "logps_train/policy_1_l": -172.1414794921875, "logps_train/policy_1_w": -131.40426635742188, "logps_train/policy_2_2": -164.9555206298828, "logps_train/policy_2_w": -189.90164184570312, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -1.8649897575378418, "rewards_train/1-l": -1.051061749458313, "rewards_train/1-w": 2.3302767276763916, "rewards_train/2-2": 2.573197841644287, "rewards_train/2-w": -1.1393826007843018, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.3813384771347046, "rewards_train/margins_1": 4.195266485214233, "rewards_train/margins_2": 3.712580442428589, "step": 250 }, { "epoch": 0.75, "logps_train/policy_1_2": -148.5989990234375, "logps_train/policy_1_l": -162.20205688476562, "logps_train/policy_1_w": -116.46359252929688, "logps_train/policy_2_2": -96.12235260009766, "logps_train/policy_2_w": -181.88113403320312, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -1.3695673942565918, "rewards_train/1-l": -1.7112442255020142, "rewards_train/1-w": 1.994071364402771, "rewards_train/2-2": 1.913546085357666, "rewards_train/2-w": -1.9240514039993286, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.705315589904785, "rewards_train/margins_1": 3.363638758659363, "rewards_train/margins_2": 3.8375974893569946, "step": 250 }, { "epoch": 0.75, "logps_train/policy_1_2": -159.34957885742188, "logps_train/policy_1_l": -129.04660034179688, "logps_train/policy_1_w": -114.29728698730469, "logps_train/policy_2_2": -102.48859405517578, "logps_train/policy_2_w": -185.89199829101562, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.0822243690490723, "rewards_train/1-l": -1.2070039510726929, "rewards_train/1-w": 2.6062092781066895, "rewards_train/2-2": 2.292938232421875, "rewards_train/2-w": -1.8412511348724365, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.8132132291793823, "rewards_train/margins_1": 3.6884336471557617, "rewards_train/margins_2": 4.1341893672943115, "step": 251 }, { "epoch": 0.75, "logps_train/policy_1_2": -239.83126831054688, "logps_train/policy_1_l": -230.3516082763672, "logps_train/policy_1_w": -142.91339111328125, "logps_train/policy_2_2": -167.4393768310547, "logps_train/policy_2_w": -203.1488800048828, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -1.5221912860870361, "rewards_train/1-l": -3.071683883666992, "rewards_train/1-w": 2.47086763381958, "rewards_train/2-2": 3.0560619831085205, "rewards_train/2-w": -0.9234817028045654, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.542551517486572, "rewards_train/margins_1": 3.993058919906616, "rewards_train/margins_2": 3.979543685913086, "step": 251 }, { "epoch": 0.75, "logps_train/policy_1_2": -198.71405029296875, "logps_train/policy_1_l": -124.4206771850586, "logps_train/policy_1_w": -125.15514373779297, "logps_train/policy_2_2": -151.5221405029297, "logps_train/policy_2_w": -174.59210205078125, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -1.0530452728271484, "rewards_train/1-l": -1.5740015506744385, "rewards_train/1-w": 2.3059701919555664, "rewards_train/2-2": 1.6214189529418945, "rewards_train/2-w": -0.35608476400375366, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.879971742630005, "rewards_train/margins_1": 3.359015464782715, "rewards_train/margins_2": 1.9775037169456482, "step": 251 }, { "epoch": 0.75, "logps_train/policy_1_2": -180.7988739013672, "logps_train/policy_1_l": -173.39749145507812, "logps_train/policy_1_w": -140.4578399658203, "logps_train/policy_2_2": -125.59867858886719, "logps_train/policy_2_w": -209.337158203125, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.6177773475646973, "rewards_train/1-l": -2.359279155731201, "rewards_train/1-w": 2.295328140258789, "rewards_train/2-2": 1.7083940505981445, "rewards_train/2-w": -2.2309815883636475, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.65460729598999, "rewards_train/margins_1": 3.9131054878234863, "rewards_train/margins_2": 3.939375638961792, "step": 251 }, { "epoch": 0.75, "learning_rate": 3.685293008903471e-06, "loss": 0.8949, "step": 252 }, { "epoch": 0.75, "logps_train/policy_1_2": -166.31820678710938, "logps_train/policy_1_l": -156.2781219482422, "logps_train/policy_1_w": -120.60395812988281, "logps_train/policy_2_2": -107.41650390625, "logps_train/policy_2_w": -177.6092529296875, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -1.738265037536621, "rewards_train/1-l": -2.1376748085021973, "rewards_train/1-w": 2.4767141342163086, "rewards_train/2-2": 2.0052247047424316, "rewards_train/2-w": -1.2156140804290771, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.614388942718506, "rewards_train/margins_1": 4.21497917175293, "rewards_train/margins_2": 3.220838785171509, "step": 252 }, { "epoch": 0.75, "logps_train/policy_1_2": -170.42092895507812, "logps_train/policy_1_l": -153.8658447265625, "logps_train/policy_1_w": -94.15081024169922, "logps_train/policy_2_2": -115.03324127197266, "logps_train/policy_2_w": -144.89810180664062, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -1.607229471206665, "rewards_train/1-l": -1.370666742324829, "rewards_train/1-w": 1.9736883640289307, "rewards_train/2-2": 1.750777006149292, "rewards_train/2-w": -0.7441079616546631, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.3443551063537598, "rewards_train/margins_1": 3.5809178352355957, "rewards_train/margins_2": 2.494884967803955, "step": 252 }, { "epoch": 0.75, "logps_train/policy_1_2": -93.49308776855469, "logps_train/policy_1_l": -117.29074096679688, "logps_train/policy_1_w": -88.87360382080078, "logps_train/policy_2_2": -72.49121856689453, "logps_train/policy_2_w": -119.50949096679688, "logps_train/ref_1_2": -96.5, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -103.5, "logps_train/ref_2_2": -87.5, "logps_train/ref_2_w": -114.5, "rewards_train/1-2": 0.3143635094165802, "rewards_train/1-l": -1.1421595811843872, "rewards_train/1-w": 1.447991132736206, "rewards_train/2-2": 1.4942378997802734, "rewards_train/2-w": -0.47809839248657227, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.5901507139205933, "rewards_train/margins_1": 1.1336276233196259, "rewards_train/margins_2": 1.9723362922668457, "step": 252 }, { "epoch": 0.75, "logps_train/policy_1_2": -239.75823974609375, "logps_train/policy_1_l": -214.0992889404297, "logps_train/policy_1_w": -167.65379333496094, "logps_train/policy_2_2": -153.3316650390625, "logps_train/policy_2_w": -251.90029907226562, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": -2.1320748329162598, "rewards_train/1-l": -2.245476484298706, "rewards_train/1-w": 2.7744157314300537, "rewards_train/2-2": 3.2871456146240234, "rewards_train/2-w": -1.9337799549102783, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.01989221572876, "rewards_train/margins_1": 4.9064905643463135, "rewards_train/margins_2": 5.220925569534302, "step": 252 }, { "epoch": 0.76, "logps_train/policy_1_2": -182.70248413085938, "logps_train/policy_1_l": -180.1143341064453, "logps_train/policy_1_w": -151.49862670898438, "logps_train/policy_2_2": -122.22256469726562, "logps_train/policy_2_w": -222.50875854492188, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -1.0960288047790527, "rewards_train/1-l": -2.0801830291748047, "rewards_train/1-w": 2.4159579277038574, "rewards_train/2-2": 2.435166120529175, "rewards_train/2-w": -1.761033058166504, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.496140956878662, "rewards_train/margins_1": 3.51198673248291, "rewards_train/margins_2": 4.196199178695679, "step": 253 }, { "epoch": 0.76, "logps_train/policy_1_2": -263.6737060546875, "logps_train/policy_1_l": -173.57611083984375, "logps_train/policy_1_w": -147.32955932617188, "logps_train/policy_2_2": -189.01419067382812, "logps_train/policy_2_w": -216.1051483154297, "logps_train/ref_1_2": -253.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -1.1158086061477661, "rewards_train/1-l": -1.459956407546997, "rewards_train/1-w": 2.8674352169036865, "rewards_train/2-2": 2.9329569339752197, "rewards_train/2-w": -1.0995769500732422, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.327391624450684, "rewards_train/margins_1": 3.9832438230514526, "rewards_train/margins_2": 4.032533884048462, "step": 253 }, { "epoch": 0.76, "logps_train/policy_1_2": -189.34158325195312, "logps_train/policy_1_l": -149.9814910888672, "logps_train/policy_1_w": -110.47757720947266, "logps_train/policy_2_2": -135.02154541015625, "logps_train/policy_2_w": -149.2401885986328, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -1.3913849592208862, "rewards_train/1-l": -1.2592328786849976, "rewards_train/1-w": 1.570406198501587, "rewards_train/2-2": 2.0365171432495117, "rewards_train/2-w": -0.7458935379981995, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.8296390771865845, "rewards_train/margins_1": 2.961791157722473, "rewards_train/margins_2": 2.782410681247711, "step": 253 }, { "epoch": 0.76, "logps_train/policy_1_2": -137.17294311523438, "logps_train/policy_1_l": -156.72665405273438, "logps_train/policy_1_w": -102.8868408203125, "logps_train/policy_2_2": -91.84684753417969, "logps_train/policy_2_w": -146.85916137695312, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -114.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -0.45909109711647034, "rewards_train/1-l": -1.911922812461853, "rewards_train/1-w": 2.10760498046875, "rewards_train/2-2": 2.2430496215820312, "rewards_train/2-w": -0.6260522603988647, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.019527792930603, "rewards_train/margins_1": 2.5666960775852203, "rewards_train/margins_2": 2.869101881980896, "step": 253 }, { "epoch": 0.76, "learning_rate": 3.6634898284719533e-06, "loss": 1.0315, "step": 254 }, { "epoch": 0.76, "logps_train/policy_1_2": -187.37533569335938, "logps_train/policy_1_l": -196.8861083984375, "logps_train/policy_1_w": -132.91015625, "logps_train/policy_2_2": -133.41285705566406, "logps_train/policy_2_w": -193.36077880859375, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -0.2758136987686157, "rewards_train/1-l": -1.7550170421600342, "rewards_train/1-w": 2.290038824081421, "rewards_train/2-2": 3.1098852157592773, "rewards_train/2-w": -0.9485781192779541, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.045055866241455, "rewards_train/margins_1": 2.5658525228500366, "rewards_train/margins_2": 4.0584633350372314, "step": 254 }, { "epoch": 0.76, "logps_train/policy_1_2": -136.41050720214844, "logps_train/policy_1_l": -171.12200927734375, "logps_train/policy_1_w": -115.40081787109375, "logps_train/policy_2_2": -97.27261352539062, "logps_train/policy_2_w": -163.49163818359375, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": 0.07379260659217834, "rewards_train/1-l": -1.7979927062988281, "rewards_train/1-w": 2.7006590366363525, "rewards_train/2-2": 2.3301610946655273, "rewards_train/2-w": -0.867597758769989, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.498651742935181, "rewards_train/margins_1": 2.626866430044174, "rewards_train/margins_2": 3.1977588534355164, "step": 254 }, { "epoch": 0.76, "logps_train/policy_1_2": -168.2965087890625, "logps_train/policy_1_l": -154.47389221191406, "logps_train/policy_1_w": -138.62127685546875, "logps_train/policy_2_2": -102.88863372802734, "logps_train/policy_2_w": -191.36019897460938, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.3171508312225342, "rewards_train/1-l": -2.3676891326904297, "rewards_train/1-w": 1.8814269304275513, "rewards_train/2-2": 2.5435585975646973, "rewards_train/2-w": -1.2024266719818115, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.249116063117981, "rewards_train/margins_1": 3.1985777616500854, "rewards_train/margins_2": 3.745985269546509, "step": 254 }, { "epoch": 0.76, "logps_train/policy_1_2": -207.79742431640625, "logps_train/policy_1_l": -202.45510864257812, "logps_train/policy_1_w": -132.65106201171875, "logps_train/policy_2_2": -138.36761474609375, "logps_train/policy_2_w": -195.22865295410156, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -1.491851806640625, "rewards_train/1-l": -2.6701202392578125, "rewards_train/1-w": 2.0609190464019775, "rewards_train/2-2": 2.747614860534668, "rewards_train/2-w": -1.4154436588287354, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.73103928565979, "rewards_train/margins_1": 3.5527708530426025, "rewards_train/margins_2": 4.163058519363403, "step": 254 }, { "epoch": 0.76, "logps_train/policy_1_2": -86.97949981689453, "logps_train/policy_1_l": -89.30886840820312, "logps_train/policy_1_w": -98.6955337524414, "logps_train/policy_2_2": -66.29817199707031, "logps_train/policy_2_w": -140.13502502441406, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -78.5, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": 0.029003269970417023, "rewards_train/1-l": -1.1064729690551758, "rewards_train/1-w": 2.4628686904907227, "rewards_train/2-2": 1.0590503215789795, "rewards_train/2-w": -0.2816670536994934, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.5693416595458984, "rewards_train/margins_1": 2.4338654205203056, "rewards_train/margins_2": 1.340717375278473, "step": 255 }, { "epoch": 0.76, "logps_train/policy_1_2": -161.91787719726562, "logps_train/policy_1_l": -165.30947875976562, "logps_train/policy_1_w": -130.30929565429688, "logps_train/policy_2_2": -113.66111755371094, "logps_train/policy_2_w": -193.72857666015625, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.0794830322265625, "rewards_train/1-l": -1.5146150588989258, "rewards_train/1-w": 2.185183048248291, "rewards_train/2-2": 2.119044542312622, "rewards_train/2-w": -1.3337957859039307, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.699798107147217, "rewards_train/margins_1": 3.2646660804748535, "rewards_train/margins_2": 3.4528403282165527, "step": 255 }, { "epoch": 0.76, "logps_train/policy_1_2": -196.37652587890625, "logps_train/policy_1_l": -194.1375274658203, "logps_train/policy_1_w": -115.60417175292969, "logps_train/policy_2_2": -142.16818237304688, "logps_train/policy_2_w": -165.45120239257812, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.0259335041046143, "rewards_train/1-l": -2.792269706726074, "rewards_train/1-w": 2.1296215057373047, "rewards_train/2-2": 2.4406044483184814, "rewards_train/2-w": -0.8810582160949707, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.921891212463379, "rewards_train/margins_1": 3.155555009841919, "rewards_train/margins_2": 3.321662664413452, "step": 255 }, { "epoch": 0.76, "logps_train/policy_1_2": -187.27027893066406, "logps_train/policy_1_l": -147.06993103027344, "logps_train/policy_1_w": -113.83351135253906, "logps_train/policy_2_2": -121.4356918334961, "logps_train/policy_2_w": -168.7601776123047, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -1.1551523208618164, "rewards_train/1-l": -1.4275000095367432, "rewards_train/1-w": 2.4424307346343994, "rewards_train/2-2": 2.2908060550689697, "rewards_train/2-w": -1.036173701286316, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.8699307441711426, "rewards_train/margins_1": 3.597583055496216, "rewards_train/margins_2": 3.3269797563552856, "step": 255 }, { "epoch": 0.77, "learning_rate": 3.641573093484283e-06, "loss": 1.093, "step": 256 }, { "epoch": 0.77, "logps_train/policy_1_2": -186.3978271484375, "logps_train/policy_1_l": -204.38067626953125, "logps_train/policy_1_w": -122.80816650390625, "logps_train/policy_2_2": -134.955078125, "logps_train/policy_2_w": -185.57496643066406, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -0.61898273229599, "rewards_train/1-l": -2.8328909873962402, "rewards_train/1-w": 2.34379243850708, "rewards_train/2-2": 2.523358106613159, "rewards_train/2-w": -1.3766376972198486, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.17668342590332, "rewards_train/margins_1": 2.96277517080307, "rewards_train/margins_2": 3.899995803833008, "step": 256 }, { "epoch": 0.77, "logps_train/policy_1_2": -192.37664794921875, "logps_train/policy_1_l": -214.83074951171875, "logps_train/policy_1_w": -132.58474731445312, "logps_train/policy_2_2": -136.4664306640625, "logps_train/policy_2_w": -200.27865600585938, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -0.858367383480072, "rewards_train/1-l": -2.934638500213623, "rewards_train/1-w": 2.8938698768615723, "rewards_train/2-2": 2.38167667388916, "rewards_train/2-w": -1.4630227088928223, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.828508377075195, "rewards_train/margins_1": 3.7522372603416443, "rewards_train/margins_2": 3.8446993827819824, "step": 256 }, { "epoch": 0.77, "logps_train/policy_1_2": -159.88705444335938, "logps_train/policy_1_l": -173.16549682617188, "logps_train/policy_1_w": -110.2270278930664, "logps_train/policy_2_2": -112.53579711914062, "logps_train/policy_2_w": -168.8568878173828, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.885189414024353, "rewards_train/1-l": -1.6852505207061768, "rewards_train/1-w": 2.145071029663086, "rewards_train/2-2": 1.9440772533416748, "rewards_train/2-w": -0.8860787749290466, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8303215503692627, "rewards_train/margins_1": 3.030260443687439, "rewards_train/margins_2": 2.8301560282707214, "step": 256 }, { "epoch": 0.77, "logps_train/policy_1_2": -218.90493774414062, "logps_train/policy_1_l": -195.33203125, "logps_train/policy_1_w": -164.0323486328125, "logps_train/policy_2_2": -158.0926055908203, "logps_train/policy_2_w": -236.23251342773438, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": -0.11783777177333832, "rewards_train/1-l": -2.1450839042663574, "rewards_train/1-w": 3.731856107711792, "rewards_train/2-2": 3.359489679336548, "rewards_train/2-w": -0.7280117273330688, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.876940011978149, "rewards_train/margins_1": 3.8496938794851303, "rewards_train/margins_2": 4.087501406669617, "step": 256 }, { "epoch": 0.77, "logps_train/policy_1_2": -157.8612518310547, "logps_train/policy_1_l": -164.68858337402344, "logps_train/policy_1_w": -172.0358428955078, "logps_train/policy_2_2": -117.86028289794922, "logps_train/policy_2_w": -248.60423278808594, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": -0.29745274782180786, "rewards_train/1-l": -1.4990102052688599, "rewards_train/1-w": 3.032060146331787, "rewards_train/2-2": 1.878814697265625, "rewards_train/2-w": -1.6573482751846313, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.531070351600647, "rewards_train/margins_1": 3.329512894153595, "rewards_train/margins_2": 3.5361629724502563, "step": 257 }, { "epoch": 0.77, "logps_train/policy_1_2": -197.09613037109375, "logps_train/policy_1_l": -124.5038833618164, "logps_train/policy_1_w": -134.21774291992188, "logps_train/policy_2_2": -144.29116821289062, "logps_train/policy_2_w": -190.7205047607422, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.0439884662628174, "rewards_train/1-l": -1.0745092630386353, "rewards_train/1-w": 2.7518577575683594, "rewards_train/2-2": 2.1615071296691895, "rewards_train/2-w": -0.6911909580230713, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.8263670206069946, "rewards_train/margins_1": 3.7958462238311768, "rewards_train/margins_2": 2.8526980876922607, "step": 257 }, { "epoch": 0.77, "logps_train/policy_1_2": -223.096923828125, "logps_train/policy_1_l": -238.52322387695312, "logps_train/policy_1_w": -163.6529083251953, "logps_train/policy_2_2": -149.97889709472656, "logps_train/policy_2_w": -256.89739990234375, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": -1.7374264001846313, "rewards_train/1-l": -3.066000461578369, "rewards_train/1-w": 3.6581473350524902, "rewards_train/2-2": 2.5523054599761963, "rewards_train/2-w": -2.3545846939086914, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.724147796630859, "rewards_train/margins_1": 5.395573735237122, "rewards_train/margins_2": 4.906890153884888, "step": 257 }, { "epoch": 0.77, "logps_train/policy_1_2": -196.42465209960938, "logps_train/policy_1_l": -177.45086669921875, "logps_train/policy_1_w": -138.74481201171875, "logps_train/policy_2_2": -138.1254119873047, "logps_train/policy_2_w": -198.09254455566406, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -0.7178571224212646, "rewards_train/1-l": -1.8093196153640747, "rewards_train/1-w": 2.4360663890838623, "rewards_train/2-2": 2.6940999031066895, "rewards_train/2-w": -1.1073988676071167, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.245386004447937, "rewards_train/margins_1": 3.153923511505127, "rewards_train/margins_2": 3.801498770713806, "step": 257 }, { "epoch": 0.77, "learning_rate": 3.6195449429751585e-06, "loss": 0.9079, "step": 258 }, { "epoch": 0.77, "logps_train/policy_1_2": -185.58596801757812, "logps_train/policy_1_l": -126.92266845703125, "logps_train/policy_1_w": -123.00251770019531, "logps_train/policy_2_2": -114.11622619628906, "logps_train/policy_2_w": -193.46646118164062, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.683009147644043, "rewards_train/1-l": -1.0692932605743408, "rewards_train/1-w": 2.2563889026641846, "rewards_train/2-2": 2.7241196632385254, "rewards_train/2-w": -2.2906880378723145, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.3256821632385254, "rewards_train/margins_1": 3.9393980503082275, "rewards_train/margins_2": 5.01480770111084, "step": 258 }, { "epoch": 0.77, "logps_train/policy_1_2": -224.68740844726562, "logps_train/policy_1_l": -225.60275268554688, "logps_train/policy_1_w": -138.94943237304688, "logps_train/policy_2_2": -158.05770874023438, "logps_train/policy_2_w": -196.78585815429688, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": -1.502920389175415, "rewards_train/1-l": -2.527071952819824, "rewards_train/1-w": 2.7330846786499023, "rewards_train/2-2": 2.3803610801696777, "rewards_train/2-w": -0.7962120175361633, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.260156631469727, "rewards_train/margins_1": 4.236005067825317, "rewards_train/margins_2": 3.176573097705841, "step": 258 }, { "epoch": 0.77, "logps_train/policy_1_2": -216.76446533203125, "logps_train/policy_1_l": -186.70675659179688, "logps_train/policy_1_w": -135.677978515625, "logps_train/policy_2_2": -131.07928466796875, "logps_train/policy_2_w": -217.31427001953125, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -2.03621244430542, "rewards_train/1-l": -2.67262864112854, "rewards_train/1-w": 2.5909910202026367, "rewards_train/2-2": 2.8522276878356934, "rewards_train/2-w": -2.432990789413452, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.263619661331177, "rewards_train/margins_1": 4.627203464508057, "rewards_train/margins_2": 5.2852184772491455, "step": 258 }, { "epoch": 0.77, "logps_train/policy_1_2": -147.23521423339844, "logps_train/policy_1_l": -118.32337188720703, "logps_train/policy_1_w": -115.78443145751953, "logps_train/policy_2_2": -96.60623168945312, "logps_train/policy_2_w": -174.48023986816406, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.208775281906128, "rewards_train/1-l": -1.1209120750427246, "rewards_train/1-w": 2.398118734359741, "rewards_train/2-2": 2.021383762359619, "rewards_train/2-w": -1.3738059997558594, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.519030809402466, "rewards_train/margins_1": 3.606894016265869, "rewards_train/margins_2": 3.3951897621154785, "step": 258 }, { "epoch": 0.78, "logps_train/policy_1_2": -129.719970703125, "logps_train/policy_1_l": -113.29051208496094, "logps_train/policy_1_w": -89.79136657714844, "logps_train/policy_2_2": -82.58292388916016, "logps_train/policy_2_w": -158.25970458984375, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -109.5, "logps_train/ref_2_2": -98.5, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": -1.1493399143218994, "rewards_train/1-l": -1.374748706817627, "rewards_train/1-w": 1.9728164672851562, "rewards_train/2-2": 1.5850672721862793, "rewards_train/2-w": -1.946868658065796, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.347565174102783, "rewards_train/margins_1": 3.1221563816070557, "rewards_train/margins_2": 3.531935930252075, "step": 259 }, { "epoch": 0.78, "logps_train/policy_1_2": -147.19801330566406, "logps_train/policy_1_l": -117.75637817382812, "logps_train/policy_1_w": -96.49937438964844, "logps_train/policy_2_2": -96.69009399414062, "logps_train/policy_2_w": -141.22540283203125, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -118.5, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": -0.9580820202827454, "rewards_train/1-l": -1.7719264030456543, "rewards_train/1-w": 1.3839490413665771, "rewards_train/2-2": 2.1854825019836426, "rewards_train/2-w": -1.3620915412902832, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.1558754444122314, "rewards_train/margins_1": 2.3420310616493225, "rewards_train/margins_2": 3.547574043273926, "step": 259 }, { "epoch": 0.78, "logps_train/policy_1_2": -144.96554565429688, "logps_train/policy_1_l": -125.5438003540039, "logps_train/policy_1_w": -135.5438232421875, "logps_train/policy_2_2": -99.98886108398438, "logps_train/policy_2_w": -199.0728759765625, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -0.6004599332809448, "rewards_train/1-l": -1.3961775302886963, "rewards_train/1-w": 1.9128057956695557, "rewards_train/2-2": 1.9719630479812622, "rewards_train/2-w": -1.8236955404281616, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.308983325958252, "rewards_train/margins_1": 2.5132657289505005, "rewards_train/margins_2": 3.795658588409424, "step": 259 }, { "epoch": 0.78, "logps_train/policy_1_2": -206.8334503173828, "logps_train/policy_1_l": -157.2413330078125, "logps_train/policy_1_w": -139.31314086914062, "logps_train/policy_2_2": -146.7345428466797, "logps_train/policy_2_w": -198.3654022216797, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.003657341003418, "rewards_train/1-l": -1.7192981243133545, "rewards_train/1-w": 2.739388942718506, "rewards_train/2-2": 2.7447590827941895, "rewards_train/2-w": -0.8205252885818481, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.45868706703186, "rewards_train/margins_1": 3.743046283721924, "rewards_train/margins_2": 3.5652843713760376, "step": 259 }, { "epoch": 0.78, "learning_rate": 3.5974075268532354e-06, "loss": 1.1474, "step": 260 }, { "epoch": 0.78, "logps_train/policy_1_2": -174.19052124023438, "logps_train/policy_1_l": -255.24977111816406, "logps_train/policy_1_w": -138.45770263671875, "logps_train/policy_2_2": -119.03422546386719, "logps_train/policy_2_w": -214.83892822265625, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -223.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -0.9493728876113892, "rewards_train/1-l": -3.2297136783599854, "rewards_train/1-w": 2.690168619155884, "rewards_train/2-2": 2.4945268630981445, "rewards_train/2-w": -1.7745182514190674, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.919882297515869, "rewards_train/margins_1": 3.639541506767273, "rewards_train/margins_2": 4.269045114517212, "step": 260 }, { "epoch": 0.78, "logps_train/policy_1_2": -169.42889404296875, "logps_train/policy_1_l": -158.04885864257812, "logps_train/policy_1_w": -86.12847900390625, "logps_train/policy_2_2": -116.18000030517578, "logps_train/policy_2_w": -125.07788848876953, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -106.5, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -118.5, "rewards_train/1-2": -1.0678884983062744, "rewards_train/1-l": -1.968412160873413, "rewards_train/1-w": 2.017815589904785, "rewards_train/2-2": 2.235515832901001, "rewards_train/2-w": -0.6648200154304504, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9862277507781982, "rewards_train/margins_1": 3.0857040882110596, "rewards_train/margins_2": 2.9003358483314514, "step": 260 }, { "epoch": 0.78, "logps_train/policy_1_2": -146.36935424804688, "logps_train/policy_1_l": -180.61309814453125, "logps_train/policy_1_w": -149.5230255126953, "logps_train/policy_2_2": -98.87139129638672, "logps_train/policy_2_w": -207.48382568359375, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -0.6478735208511353, "rewards_train/1-l": -1.7499821186065674, "rewards_train/1-w": 2.6032638549804688, "rewards_train/2-2": 2.0936226844787598, "rewards_train/2-w": -1.2972102165222168, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.353245973587036, "rewards_train/margins_1": 3.251137375831604, "rewards_train/margins_2": 3.3908329010009766, "step": 260 }, { "epoch": 0.78, "logps_train/policy_1_2": -164.151611328125, "logps_train/policy_1_l": -144.22726440429688, "logps_train/policy_1_w": -109.07623291015625, "logps_train/policy_2_2": -110.67512512207031, "logps_train/policy_2_w": -156.7191925048828, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -0.8987559080123901, "rewards_train/1-l": -1.4696983098983765, "rewards_train/1-w": 1.4255802631378174, "rewards_train/2-2": 2.0844404697418213, "rewards_train/2-w": -1.2445745468139648, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.895278573036194, "rewards_train/margins_1": 2.3243361711502075, "rewards_train/margins_2": 3.329015016555786, "step": 260 }, { "epoch": 0.78, "logps_train/policy_1_2": -204.88653564453125, "logps_train/policy_1_l": -159.18614196777344, "logps_train/policy_1_w": -147.49984741210938, "logps_train/policy_2_2": -140.15328979492188, "logps_train/policy_2_w": -212.54470825195312, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.194122076034546, "rewards_train/1-l": -1.1140005588531494, "rewards_train/1-w": 2.712710380554199, "rewards_train/2-2": 2.755765438079834, "rewards_train/2-w": -1.2521262168884277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.8267109394073486, "rewards_train/margins_1": 3.906832456588745, "rewards_train/margins_2": 4.007891654968262, "step": 261 }, { "epoch": 0.78, "logps_train/policy_1_2": -148.08563232421875, "logps_train/policy_1_l": -117.2015151977539, "logps_train/policy_1_w": -113.43150329589844, "logps_train/policy_2_2": -99.72125244140625, "logps_train/policy_2_w": -164.10687255859375, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -105.5, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -0.8146166801452637, "rewards_train/1-l": -1.176304578781128, "rewards_train/1-w": 2.5150532722473145, "rewards_train/2-2": 2.3065850734710693, "rewards_train/2-w": -0.7059991955757141, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6913578510284424, "rewards_train/margins_1": 3.329669952392578, "rewards_train/margins_2": 3.0125842690467834, "step": 261 }, { "epoch": 0.78, "logps_train/policy_1_2": -137.36581420898438, "logps_train/policy_1_l": -132.384521484375, "logps_train/policy_1_w": -105.65629577636719, "logps_train/policy_2_2": -96.97332000732422, "logps_train/policy_2_w": -157.85804748535156, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -114.5, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -0.4717390835285187, "rewards_train/1-l": -1.6172118186950684, "rewards_train/1-w": 2.4281203746795654, "rewards_train/2-2": 1.7604806423187256, "rewards_train/2-w": -1.1686179637908936, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.045332193374634, "rewards_train/margins_1": 2.899859458208084, "rewards_train/margins_2": 2.929098606109619, "step": 261 }, { "epoch": 0.78, "logps_train/policy_1_2": -156.6941680908203, "logps_train/policy_1_l": -191.44065856933594, "logps_train/policy_1_w": -111.24371337890625, "logps_train/policy_2_2": -114.158203125, "logps_train/policy_2_w": -161.23605346679688, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -0.32800978422164917, "rewards_train/1-l": -1.7329819202423096, "rewards_train/1-w": 1.8661068677902222, "rewards_train/2-2": 2.118164539337158, "rewards_train/2-w": -0.8325894474983215, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.5990887880325317, "rewards_train/margins_1": 2.1941166520118713, "rewards_train/margins_2": 2.9507539868354797, "step": 261 }, { "epoch": 0.78, "learning_rate": 3.5751630056913017e-06, "loss": 0.875, "step": 262 }, { "epoch": 0.78, "logps_train/policy_1_2": -241.9620819091797, "logps_train/policy_1_l": -186.66253662109375, "logps_train/policy_1_w": -144.42294311523438, "logps_train/policy_2_2": -162.44737243652344, "logps_train/policy_2_w": -212.2344512939453, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.8469902276992798, "rewards_train/1-l": -1.91488778591156, "rewards_train/1-w": 2.5759687423706055, "rewards_train/2-2": 3.2265515327453613, "rewards_train/2-w": -1.2631909847259521, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.4908565282821655, "rewards_train/margins_1": 4.422958970069885, "rewards_train/margins_2": 4.4897425174713135, "step": 262 }, { "epoch": 0.78, "logps_train/policy_1_2": -204.88839721679688, "logps_train/policy_1_l": -198.97085571289062, "logps_train/policy_1_w": -132.70301818847656, "logps_train/policy_2_2": -149.1385498046875, "logps_train/policy_2_w": -187.75083923339844, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -1.0384478569030762, "rewards_train/1-l": -1.9124188423156738, "rewards_train/1-w": 2.524034023284912, "rewards_train/2-2": 2.505969524383545, "rewards_train/2-w": -0.8778183460235596, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.436452865600586, "rewards_train/margins_1": 3.5624818801879883, "rewards_train/margins_2": 3.3837878704071045, "step": 262 }, { "epoch": 0.78, "logps_train/policy_1_2": -164.125244140625, "logps_train/policy_1_l": -156.4417266845703, "logps_train/policy_1_w": -125.61736297607422, "logps_train/policy_2_2": -100.91156005859375, "logps_train/policy_2_w": -188.66696166992188, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -2.111377716064453, "rewards_train/1-l": -1.5889968872070312, "rewards_train/1-w": 2.7784981727600098, "rewards_train/2-2": 2.104424476623535, "rewards_train/2-w": -0.9987279176712036, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.367495059967041, "rewards_train/margins_1": 4.889875888824463, "rewards_train/margins_2": 3.1031523942947388, "step": 262 }, { "epoch": 0.78, "logps_train/policy_1_2": -161.73294067382812, "logps_train/policy_1_l": -190.55555725097656, "logps_train/policy_1_w": -120.61830139160156, "logps_train/policy_2_2": -103.92689514160156, "logps_train/policy_2_w": -196.3664093017578, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.42368483543396, "rewards_train/1-l": -2.572523593902588, "rewards_train/1-w": 2.2129745483398438, "rewards_train/2-2": 2.1301627159118652, "rewards_train/2-w": -2.672773838043213, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.785498142242432, "rewards_train/margins_1": 3.6366593837738037, "rewards_train/margins_2": 4.802936553955078, "step": 262 }, { "epoch": 0.79, "logps_train/policy_1_2": -256.7677001953125, "logps_train/policy_1_l": -183.53793334960938, "logps_train/policy_1_w": -167.45655822753906, "logps_train/policy_2_2": -188.91009521484375, "logps_train/policy_2_w": -232.87652587890625, "logps_train/ref_1_2": -245.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -1.1459099054336548, "rewards_train/1-l": -1.6873877048492432, "rewards_train/1-w": 2.3333487510681152, "rewards_train/2-2": 3.10762357711792, "rewards_train/2-w": -1.536577820777893, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.020736455917358, "rewards_train/margins_1": 3.47925865650177, "rewards_train/margins_2": 4.644201397895813, "step": 263 }, { "epoch": 0.79, "logps_train/policy_1_2": -193.63934326171875, "logps_train/policy_1_l": -239.83261108398438, "logps_train/policy_1_w": -138.7330322265625, "logps_train/policy_2_2": -127.56844329833984, "logps_train/policy_2_w": -212.21713256835938, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.4965527057647705, "rewards_train/1-l": -2.200742244720459, "rewards_train/1-w": 2.739588737487793, "rewards_train/2-2": 2.4865152835845947, "rewards_train/2-w": -1.2629234790802002, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.940330982208252, "rewards_train/margins_1": 4.2361414432525635, "rewards_train/margins_2": 3.749438762664795, "step": 263 }, { "epoch": 0.79, "logps_train/policy_1_2": -208.38137817382812, "logps_train/policy_1_l": -201.44036865234375, "logps_train/policy_1_w": -158.68385314941406, "logps_train/policy_2_2": -147.7969512939453, "logps_train/policy_2_w": -235.4955596923828, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -1.121342420578003, "rewards_train/1-l": -2.34110689163208, "rewards_train/1-w": 2.788694381713867, "rewards_train/2-2": 2.685930013656616, "rewards_train/2-w": -2.099555492401123, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.129801273345947, "rewards_train/margins_1": 3.91003680229187, "rewards_train/margins_2": 4.785485506057739, "step": 263 }, { "epoch": 0.79, "logps_train/policy_1_2": -192.06204223632812, "logps_train/policy_1_l": -161.5947265625, "logps_train/policy_1_w": -139.53045654296875, "logps_train/policy_2_2": -120.3810806274414, "logps_train/policy_2_w": -210.49771118164062, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -1.9925330877304077, "rewards_train/1-l": -1.597949743270874, "rewards_train/1-w": 2.33836030960083, "rewards_train/2-2": 2.3115017414093018, "rewards_train/2-w": -1.7880524396896362, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.936310052871704, "rewards_train/margins_1": 4.330893397331238, "rewards_train/margins_2": 4.099554181098938, "step": 263 }, { "epoch": 0.79, "learning_rate": 3.552813550515408e-06, "loss": 0.8754, "step": 264 }, { "epoch": 0.79, "logps_train/policy_1_2": -132.22372436523438, "logps_train/policy_1_l": -127.75074005126953, "logps_train/policy_1_w": -83.42656707763672, "logps_train/policy_2_2": -77.9873046875, "logps_train/policy_2_w": -149.2439727783203, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": -1.7231050729751587, "rewards_train/1-l": -2.392603874206543, "rewards_train/1-w": 2.1979684829711914, "rewards_train/2-2": 1.490600347518921, "rewards_train/2-w": -2.0599446296691895, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.590572357177734, "rewards_train/margins_1": 3.92107355594635, "rewards_train/margins_2": 3.5505449771881104, "step": 264 }, { "epoch": 0.79, "logps_train/policy_1_2": -229.5999298095703, "logps_train/policy_1_l": -181.84930419921875, "logps_train/policy_1_w": -156.2289581298828, "logps_train/policy_2_2": -158.9661865234375, "logps_train/policy_2_w": -219.99917602539062, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": -1.4810866117477417, "rewards_train/1-l": -1.9834398031234741, "rewards_train/1-w": 3.234673023223877, "rewards_train/2-2": 3.1869759559631348, "rewards_train/2-w": -0.671011745929718, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.218112826347351, "rewards_train/margins_1": 4.715759634971619, "rewards_train/margins_2": 3.857987701892853, "step": 264 }, { "epoch": 0.79, "logps_train/policy_1_2": -209.7202911376953, "logps_train/policy_1_l": -172.13397216796875, "logps_train/policy_1_w": -147.21803283691406, "logps_train/policy_2_2": -150.82138061523438, "logps_train/policy_2_w": -211.38160705566406, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.3579665422439575, "rewards_train/1-l": -1.3834168910980225, "rewards_train/1-w": 2.77175235748291, "rewards_train/2-2": 2.298330307006836, "rewards_train/2-w": -1.175074577331543, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.155169248580933, "rewards_train/margins_1": 4.129718899726868, "rewards_train/margins_2": 3.473404884338379, "step": 264 }, { "epoch": 0.79, "logps_train/policy_1_2": -182.36817932128906, "logps_train/policy_1_l": -169.75997924804688, "logps_train/policy_1_w": -122.86367797851562, "logps_train/policy_2_2": -128.59970092773438, "logps_train/policy_2_w": -176.0295867919922, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.2743169069290161, "rewards_train/1-l": -2.3050501346588135, "rewards_train/1-w": 1.9456634521484375, "rewards_train/2-2": 2.1048741340637207, "rewards_train/2-w": -1.3767871856689453, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.250713586807251, "rewards_train/margins_1": 3.2199803590774536, "rewards_train/margins_2": 3.481661319732666, "step": 264 }, { "epoch": 0.79, "logps_train/policy_1_2": -171.83392333984375, "logps_train/policy_1_l": -191.55967712402344, "logps_train/policy_1_w": -128.8407440185547, "logps_train/policy_2_2": -103.76622772216797, "logps_train/policy_2_w": -222.100341796875, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -1.8681575059890747, "rewards_train/1-l": -2.6699821949005127, "rewards_train/1-w": 2.767096996307373, "rewards_train/2-2": 2.1307992935180664, "rewards_train/2-w": -2.860034465789795, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.437079191207886, "rewards_train/margins_1": 4.635254502296448, "rewards_train/margins_2": 4.990833759307861, "step": 265 }, { "epoch": 0.79, "logps_train/policy_1_2": -174.1944580078125, "logps_train/policy_1_l": -144.5736083984375, "logps_train/policy_1_w": -90.09391784667969, "logps_train/policy_2_2": -122.68077087402344, "logps_train/policy_2_w": -138.41607666015625, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": -1.5454223155975342, "rewards_train/1-l": -1.6144906282424927, "rewards_train/1-w": 1.7380692958831787, "rewards_train/2-2": 1.672938585281372, "rewards_train/2-w": -1.2513725757598877, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.3525599241256714, "rewards_train/margins_1": 3.283491611480713, "rewards_train/margins_2": 2.9243111610412598, "step": 265 }, { "epoch": 0.79, "logps_train/policy_1_2": -163.3689422607422, "logps_train/policy_1_l": -197.8916778564453, "logps_train/policy_1_w": -98.63143157958984, "logps_train/policy_2_2": -117.59231567382812, "logps_train/policy_2_w": -145.07110595703125, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -0.996660053730011, "rewards_train/1-l": -2.5667552947998047, "rewards_train/1-w": 1.9366618394851685, "rewards_train/2-2": 1.910495400428772, "rewards_train/2-w": -0.7557432651519775, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.503417134284973, "rewards_train/margins_1": 2.9333218932151794, "rewards_train/margins_2": 2.6662386655807495, "step": 265 }, { "epoch": 0.79, "logps_train/policy_1_2": -186.75494384765625, "logps_train/policy_1_l": -169.8460235595703, "logps_train/policy_1_w": -138.20986938476562, "logps_train/policy_2_2": -130.85617065429688, "logps_train/policy_2_w": -205.9280548095703, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -1.3409254550933838, "rewards_train/1-l": -1.8614579439163208, "rewards_train/1-w": 2.6559667587280273, "rewards_train/2-2": 2.1699745655059814, "rewards_train/2-w": -1.3572577238082886, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.517424702644348, "rewards_train/margins_1": 3.996892213821411, "rewards_train/margins_2": 3.52723228931427, "step": 265 }, { "epoch": 0.8, "learning_rate": 3.5303613425929805e-06, "loss": 1.0493, "step": 266 }, { "epoch": 0.8, "logps_train/policy_1_2": -166.14236450195312, "logps_train/policy_1_l": -160.30416870117188, "logps_train/policy_1_w": -144.03399658203125, "logps_train/policy_2_2": -116.65646362304688, "logps_train/policy_2_w": -225.30142211914062, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -1.3115026950836182, "rewards_train/1-l": -1.5394030809402466, "rewards_train/1-w": 2.720428943634033, "rewards_train/2-2": 1.5525171756744385, "rewards_train/2-w": -1.8477189540863037, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.25983202457428, "rewards_train/margins_1": 4.031931638717651, "rewards_train/margins_2": 3.400236129760742, "step": 266 }, { "epoch": 0.8, "logps_train/policy_1_2": -172.322021484375, "logps_train/policy_1_l": -125.62263488769531, "logps_train/policy_1_w": -101.29714965820312, "logps_train/policy_2_2": -108.69932556152344, "logps_train/policy_2_w": -164.61294555664062, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -112.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -1.1989995241165161, "rewards_train/1-l": -1.3259844779968262, "rewards_train/1-w": 2.2481164932250977, "rewards_train/2-2": 2.457411289215088, "rewards_train/2-w": -1.8728177547454834, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.574100971221924, "rewards_train/margins_1": 3.4471160173416138, "rewards_train/margins_2": 4.330229043960571, "step": 266 }, { "epoch": 0.8, "logps_train/policy_1_2": -208.93756103515625, "logps_train/policy_1_l": -167.35824584960938, "logps_train/policy_1_w": -132.37155151367188, "logps_train/policy_2_2": -130.31442260742188, "logps_train/policy_2_w": -202.77005004882812, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -2.5164127349853516, "rewards_train/1-l": -1.8691246509552002, "rewards_train/1-w": 1.9155791997909546, "rewards_train/2-2": 2.5607447624206543, "rewards_train/2-w": -2.6309123039245605, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.784703850746155, "rewards_train/margins_1": 4.431991934776306, "rewards_train/margins_2": 5.191657066345215, "step": 266 }, { "epoch": 0.8, "logps_train/policy_1_2": -181.77328491210938, "logps_train/policy_1_l": -155.15106201171875, "logps_train/policy_1_w": -113.77507781982422, "logps_train/policy_2_2": -124.9219741821289, "logps_train/policy_2_w": -169.44882202148438, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -1.912877082824707, "rewards_train/1-l": -1.282782793045044, "rewards_train/1-w": 1.907794713973999, "rewards_train/2-2": 1.4471584558486938, "rewards_train/2-w": -1.6346293687820435, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.190577507019043, "rewards_train/margins_1": 3.820671796798706, "rewards_train/margins_2": 3.0817878246307373, "step": 266 }, { "epoch": 0.8, "logps_train/policy_1_2": -206.80364990234375, "logps_train/policy_1_l": -172.90567016601562, "logps_train/policy_1_w": -132.2621307373047, "logps_train/policy_2_2": -149.5521240234375, "logps_train/policy_2_w": -196.96884155273438, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.3647384643554688, "rewards_train/1-l": -1.7123429775238037, "rewards_train/1-w": 2.0847244262695312, "rewards_train/2-2": 2.2201786041259766, "rewards_train/2-w": -1.5281339883804321, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.797067403793335, "rewards_train/margins_1": 3.449462890625, "rewards_train/margins_2": 3.7483125925064087, "step": 267 }, { "epoch": 0.8, "logps_train/policy_1_2": -182.18438720703125, "logps_train/policy_1_l": -149.83731079101562, "logps_train/policy_1_w": -110.80559539794922, "logps_train/policy_2_2": -116.52112579345703, "logps_train/policy_2_w": -161.859375, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -1.591095209121704, "rewards_train/1-l": -2.1095125675201416, "rewards_train/1-w": 1.7340890169143677, "rewards_train/2-2": 2.4533557891845703, "rewards_train/2-w": -1.5220706462860107, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.8436015844345093, "rewards_train/margins_1": 3.3251842260360718, "rewards_train/margins_2": 3.975426435470581, "step": 267 }, { "epoch": 0.8, "logps_train/policy_1_2": -152.93310546875, "logps_train/policy_1_l": -138.2782745361328, "logps_train/policy_1_w": -96.07038879394531, "logps_train/policy_2_2": -111.9058837890625, "logps_train/policy_2_w": -151.12916564941406, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -0.46518510580062866, "rewards_train/1-l": -1.9690392017364502, "rewards_train/1-w": 2.073430061340332, "rewards_train/2-2": 1.707068920135498, "rewards_train/2-w": -1.0822514295578003, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.042469263076782, "rewards_train/margins_1": 2.5386151671409607, "rewards_train/margins_2": 2.7893203496932983, "step": 267 }, { "epoch": 0.8, "logps_train/policy_1_2": -258.6045837402344, "logps_train/policy_1_l": -175.0790557861328, "logps_train/policy_1_w": -136.064697265625, "logps_train/policy_2_2": -170.54083251953125, "logps_train/policy_2_w": -204.48736572265625, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -205.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -2.406160831451416, "rewards_train/1-l": -1.8222613334655762, "rewards_train/1-w": 2.1080808639526367, "rewards_train/2-2": 3.449824333190918, "rewards_train/2-w": -1.9708551168441772, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.930342197418213, "rewards_train/margins_1": 4.514241695404053, "rewards_train/margins_2": 5.420679450035095, "step": 267 }, { "epoch": 0.8, "learning_rate": 3.5078085732199314e-06, "loss": 0.9227, "step": 268 }, { "epoch": 0.8, "logps_train/policy_1_2": -212.94955444335938, "logps_train/policy_1_l": -179.04208374023438, "logps_train/policy_1_w": -123.16072082519531, "logps_train/policy_2_2": -141.38360595703125, "logps_train/policy_2_w": -193.04965209960938, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -2.309019088745117, "rewards_train/1-l": -1.548445224761963, "rewards_train/1-w": 1.7933018207550049, "rewards_train/2-2": 2.129997730255127, "rewards_train/2-w": -1.9284026622772217, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.3417470455169678, "rewards_train/margins_1": 4.102320909500122, "rewards_train/margins_2": 4.058400392532349, "step": 268 }, { "epoch": 0.8, "logps_train/policy_1_2": -160.6303253173828, "logps_train/policy_1_l": -168.69314575195312, "logps_train/policy_1_w": -115.78475952148438, "logps_train/policy_2_2": -116.42115020751953, "logps_train/policy_2_w": -167.23623657226562, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -0.9628373980522156, "rewards_train/1-l": -1.8690204620361328, "rewards_train/1-w": 1.9640532732009888, "rewards_train/2-2": 2.0194082260131836, "rewards_train/2-w": -0.8880769610404968, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.8330737352371216, "rewards_train/margins_1": 2.9268906712532043, "rewards_train/margins_2": 2.9074851870536804, "step": 268 }, { "epoch": 0.8, "logps_train/policy_1_2": -164.6439208984375, "logps_train/policy_1_l": -179.66384887695312, "logps_train/policy_1_w": -127.40704345703125, "logps_train/policy_2_2": -105.22830200195312, "logps_train/policy_2_w": -197.0308074951172, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -1.1847045421600342, "rewards_train/1-l": -1.9719516038894653, "rewards_train/1-w": 2.31046724319458, "rewards_train/2-2": 2.248263359069824, "rewards_train/2-w": -2.1886277198791504, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.282418847084045, "rewards_train/margins_1": 3.4951717853546143, "rewards_train/margins_2": 4.436891078948975, "step": 268 }, { "epoch": 0.8, "logps_train/policy_1_2": -244.7774200439453, "logps_train/policy_1_l": -259.8769226074219, "logps_train/policy_1_w": -185.25306701660156, "logps_train/policy_2_2": -158.95211791992188, "logps_train/policy_2_w": -266.5245361328125, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -245.0, "rewards_train/1-2": -2.0957117080688477, "rewards_train/1-l": -2.7562954425811768, "rewards_train/1-w": 2.443444013595581, "rewards_train/2-2": 3.3799848556518555, "rewards_train/2-w": -2.185265064239502, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.199739456176758, "rewards_train/margins_1": 4.539155721664429, "rewards_train/margins_2": 5.565249919891357, "step": 268 }, { "epoch": 0.81, "logps_train/policy_1_2": -190.88787841796875, "logps_train/policy_1_l": -148.37173461914062, "logps_train/policy_1_w": -128.6312255859375, "logps_train/policy_2_2": -134.68235778808594, "logps_train/policy_2_w": -192.68885803222656, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -0.7622238397598267, "rewards_train/1-l": -1.7066062688827515, "rewards_train/1-w": 2.1509411334991455, "rewards_train/2-2": 2.6919209957122803, "rewards_train/2-w": -1.7290420532226562, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.857547402381897, "rewards_train/margins_1": 2.913164973258972, "rewards_train/margins_2": 4.4209630489349365, "step": 269 }, { "epoch": 0.81, "logps_train/policy_1_2": -164.62625122070312, "logps_train/policy_1_l": -167.6203155517578, "logps_train/policy_1_w": -146.44285583496094, "logps_train/policy_2_2": -112.33953857421875, "logps_train/policy_2_w": -210.4158935546875, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -0.7356718182563782, "rewards_train/1-l": -1.3383979797363281, "rewards_train/1-w": 2.6358895301818848, "rewards_train/2-2": 2.0445613861083984, "rewards_train/2-w": -1.2997195720672607, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.974287509918213, "rewards_train/margins_1": 3.371561348438263, "rewards_train/margins_2": 3.344280958175659, "step": 269 }, { "epoch": 0.81, "logps_train/policy_1_2": -121.95850372314453, "logps_train/policy_1_l": -127.59660339355469, "logps_train/policy_1_w": -104.1777572631836, "logps_train/policy_2_2": -78.2689208984375, "logps_train/policy_2_w": -160.58668518066406, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -93.5, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -1.312648057937622, "rewards_train/1-l": -1.5382001399993896, "rewards_train/1-w": 1.8295629024505615, "rewards_train/2-2": 1.5283818244934082, "rewards_train/2-w": -1.664625644683838, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.367763042449951, "rewards_train/margins_1": 3.1422109603881836, "rewards_train/margins_2": 3.193007469177246, "step": 269 }, { "epoch": 0.81, "logps_train/policy_1_2": -134.44947814941406, "logps_train/policy_1_l": -167.78692626953125, "logps_train/policy_1_w": -107.5566635131836, "logps_train/policy_2_2": -101.31751251220703, "logps_train/policy_2_w": -145.31741333007812, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -0.3518814146518707, "rewards_train/1-l": -1.445880651473999, "rewards_train/1-w": 2.100389003753662, "rewards_train/2-2": 1.7029168605804443, "rewards_train/2-w": -0.3836948275566101, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.546269655227661, "rewards_train/margins_1": 2.452270418405533, "rewards_train/margins_2": 2.0866116881370544, "step": 269 }, { "epoch": 0.81, "learning_rate": 3.4851574435067925e-06, "loss": 0.9648, "step": 270 }, { "epoch": 0.81, "logps_train/policy_1_2": -145.6959228515625, "logps_train/policy_1_l": -133.14186096191406, "logps_train/policy_1_w": -106.12080383300781, "logps_train/policy_2_2": -108.79021453857422, "logps_train/policy_2_w": -156.6600799560547, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -0.6477174758911133, "rewards_train/1-l": -1.3607677221298218, "rewards_train/1-w": 1.5242481231689453, "rewards_train/2-2": 1.4340646266937256, "rewards_train/2-w": -1.3113205432891846, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.885015845298767, "rewards_train/margins_1": 2.1719655990600586, "rewards_train/margins_2": 2.74538516998291, "step": 270 }, { "epoch": 0.81, "logps_train/policy_1_2": -257.3570556640625, "logps_train/policy_1_l": -218.6708984375, "logps_train/policy_1_w": -145.97964477539062, "logps_train/policy_2_2": -160.93496704101562, "logps_train/policy_2_w": -233.44554138183594, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -3.1435182094573975, "rewards_train/1-l": -2.400294780731201, "rewards_train/1-w": 2.905552387237549, "rewards_train/2-2": 3.489314556121826, "rewards_train/2-w": -2.7062721252441406, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.30584716796875, "rewards_train/margins_1": 6.049070596694946, "rewards_train/margins_2": 6.195586681365967, "step": 270 }, { "epoch": 0.81, "logps_train/policy_1_2": -161.16873168945312, "logps_train/policy_1_l": -212.3677978515625, "logps_train/policy_1_w": -135.15988159179688, "logps_train/policy_2_2": -108.58480834960938, "logps_train/policy_2_w": -197.35623168945312, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.2777130603790283, "rewards_train/1-l": -2.529942750930786, "rewards_train/1-w": 2.529714345932007, "rewards_train/2-2": 1.8110504150390625, "rewards_train/2-w": -1.3059369325637817, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.059657096862793, "rewards_train/margins_1": 3.807427406311035, "rewards_train/margins_2": 3.1169873476028442, "step": 270 }, { "epoch": 0.81, "logps_train/policy_1_2": -209.55963134765625, "logps_train/policy_1_l": -177.1941680908203, "logps_train/policy_1_w": -151.728271484375, "logps_train/policy_2_2": -140.75975036621094, "logps_train/policy_2_w": -229.16659545898438, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": -2.1239326000213623, "rewards_train/1-l": -1.9422680139541626, "rewards_train/1-w": 2.68459415435791, "rewards_train/2-2": 2.275588035583496, "rewards_train/2-w": -1.7830660343170166, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.626862168312073, "rewards_train/margins_1": 4.8085267543792725, "rewards_train/margins_2": 4.058654069900513, "step": 270 }, { "epoch": 0.81, "logps_train/policy_1_2": -245.40817260742188, "logps_train/policy_1_l": -258.6728515625, "logps_train/policy_1_w": -160.73902893066406, "logps_train/policy_2_2": -170.3602752685547, "logps_train/policy_2_w": -235.8930206298828, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -1.382322072982788, "rewards_train/1-l": -2.2710423469543457, "rewards_train/1-w": 2.8788323402404785, "rewards_train/2-2": 3.004694938659668, "rewards_train/2-w": -1.7764122486114502, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.149874687194824, "rewards_train/margins_1": 4.261154413223267, "rewards_train/margins_2": 4.781107187271118, "step": 271 }, { "epoch": 0.81, "logps_train/policy_1_2": -150.1944580078125, "logps_train/policy_1_l": -120.01619720458984, "logps_train/policy_1_w": -110.01313781738281, "logps_train/policy_2_2": -108.911865234375, "logps_train/policy_2_w": -157.32476806640625, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -107.5, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.6597781777381897, "rewards_train/1-l": -1.264852523803711, "rewards_train/1-w": 2.079838514328003, "rewards_train/2-2": 1.8137199878692627, "rewards_train/2-w": -0.8651684522628784, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.344691038131714, "rewards_train/margins_1": 2.7396166920661926, "rewards_train/margins_2": 2.678888440132141, "step": 271 }, { "epoch": 0.81, "logps_train/policy_1_2": -103.80110931396484, "logps_train/policy_1_l": -133.25137329101562, "logps_train/policy_1_w": -145.95663452148438, "logps_train/policy_2_2": -63.66541290283203, "logps_train/policy_2_w": -205.153564453125, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -85.5, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": 0.03658872842788696, "rewards_train/1-l": -0.827970027923584, "rewards_train/1-w": 1.8371493816375732, "rewards_train/2-2": 2.1926751136779785, "rewards_train/2-w": -2.3505125045776367, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.6651194095611572, "rewards_train/margins_1": 1.8005606532096863, "rewards_train/margins_2": 4.543187618255615, "step": 271 }, { "epoch": 0.81, "logps_train/policy_1_2": -162.8227081298828, "logps_train/policy_1_l": -135.92193603515625, "logps_train/policy_1_w": -107.5687255859375, "logps_train/policy_2_2": -113.14677429199219, "logps_train/policy_2_w": -158.78977966308594, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.6549274921417236, "rewards_train/1-l": -1.0718564987182617, "rewards_train/1-w": 1.7589482069015503, "rewards_train/2-2": 2.421260118484497, "rewards_train/2-w": -1.1192123889923096, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.830804705619812, "rewards_train/margins_1": 2.413875699043274, "rewards_train/margins_2": 3.5404725074768066, "step": 271 }, { "epoch": 0.81, "learning_rate": 3.462410164163893e-06, "loss": 0.9812, "step": 272 }, { "epoch": 0.81, "logps_train/policy_1_2": -156.08895874023438, "logps_train/policy_1_l": -155.35467529296875, "logps_train/policy_1_w": -149.5119171142578, "logps_train/policy_2_2": -100.83319091796875, "logps_train/policy_2_w": -225.521484375, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -1.2045987844467163, "rewards_train/1-l": -1.5560319423675537, "rewards_train/1-w": 2.1204874515533447, "rewards_train/2-2": 1.6595697402954102, "rewards_train/2-w": -2.2517566680908203, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.6765193939208984, "rewards_train/margins_1": 3.325086236000061, "rewards_train/margins_2": 3.9113264083862305, "step": 272 }, { "epoch": 0.81, "logps_train/policy_1_2": -168.91970825195312, "logps_train/policy_1_l": -176.83287048339844, "logps_train/policy_1_w": -125.59040069580078, "logps_train/policy_2_2": -123.8619613647461, "logps_train/policy_2_w": -170.32850646972656, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": -0.3939230442047119, "rewards_train/1-l": -1.9689314365386963, "rewards_train/1-w": 2.2559988498687744, "rewards_train/2-2": 2.6440775394439697, "rewards_train/2-w": -0.3824600875377655, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.224930286407471, "rewards_train/margins_1": 2.6499218940734863, "rewards_train/margins_2": 3.0265376269817352, "step": 272 }, { "epoch": 0.81, "logps_train/policy_1_2": -200.98275756835938, "logps_train/policy_1_l": -153.252685546875, "logps_train/policy_1_w": -110.26837921142578, "logps_train/policy_2_2": -142.2252197265625, "logps_train/policy_2_w": -180.34486389160156, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -1.4334321022033691, "rewards_train/1-l": -1.4758541584014893, "rewards_train/1-w": 2.1801939010620117, "rewards_train/2-2": 2.2095093727111816, "rewards_train/2-w": -1.7270644903182983, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.656048059463501, "rewards_train/margins_1": 3.613626003265381, "rewards_train/margins_2": 3.93657386302948, "step": 272 }, { "epoch": 0.81, "logps_train/policy_1_2": -130.4127197265625, "logps_train/policy_1_l": -114.40616607666016, "logps_train/policy_1_w": -119.81945037841797, "logps_train/policy_2_2": -77.05829620361328, "logps_train/policy_2_w": -192.19424438476562, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.2537708282470703, "rewards_train/1-l": -1.4679603576660156, "rewards_train/1-w": 2.19083309173584, "rewards_train/2-2": 1.7158503532409668, "rewards_train/2-w": -2.399111270904541, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.6587934494018555, "rewards_train/margins_1": 3.44460391998291, "rewards_train/margins_2": 4.114961624145508, "step": 272 }, { "epoch": 0.82, "logps_train/policy_1_2": -174.28016662597656, "logps_train/policy_1_l": -129.867431640625, "logps_train/policy_1_w": -120.99507141113281, "logps_train/policy_2_2": -120.38455200195312, "logps_train/policy_2_w": -171.776611328125, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -0.8625870943069458, "rewards_train/1-l": -1.143761396408081, "rewards_train/1-w": 2.169048309326172, "rewards_train/2-2": 2.351583480834961, "rewards_train/2-w": -1.0624275207519531, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.312809705734253, "rewards_train/margins_1": 3.0316354036331177, "rewards_train/margins_2": 3.414011001586914, "step": 273 }, { "epoch": 0.82, "logps_train/policy_1_2": -194.36685180664062, "logps_train/policy_1_l": -137.3983154296875, "logps_train/policy_1_w": -90.42424011230469, "logps_train/policy_2_2": -133.53457641601562, "logps_train/policy_2_w": -129.1044464111328, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -117.5, "rewards_train/1-2": -2.0800445079803467, "rewards_train/1-l": -1.3617064952850342, "rewards_train/1-w": 1.057966947555542, "rewards_train/2-2": 1.8678309917449951, "rewards_train/2-w": -1.146772861480713, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.419673442840576, "rewards_train/margins_1": 3.1380114555358887, "rewards_train/margins_2": 3.014603853225708, "step": 273 }, { "epoch": 0.82, "logps_train/policy_1_2": -208.87852478027344, "logps_train/policy_1_l": -246.28500366210938, "logps_train/policy_1_w": -183.1544647216797, "logps_train/policy_2_2": -155.42999267578125, "logps_train/policy_2_w": -244.25637817382812, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -213.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": -0.02925962209701538, "rewards_train/1-l": -2.100766181945801, "rewards_train/1-w": 3.0009584426879883, "rewards_train/2-2": 3.2187204360961914, "rewards_train/2-w": -1.0260276794433594, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.101724624633789, "rewards_train/margins_1": 3.0302180647850037, "rewards_train/margins_2": 4.244748115539551, "step": 273 }, { "epoch": 0.82, "logps_train/policy_1_2": -155.1287841796875, "logps_train/policy_1_l": -160.57818603515625, "logps_train/policy_1_w": -110.47489166259766, "logps_train/policy_2_2": -110.93276977539062, "logps_train/policy_2_w": -156.13388061523438, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -0.7460808753967285, "rewards_train/1-l": -2.1793212890625, "rewards_train/1-w": 1.844307541847229, "rewards_train/2-2": 2.14715313911438, "rewards_train/2-w": -0.9649507403373718, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.023628830909729, "rewards_train/margins_1": 2.5903884172439575, "rewards_train/margins_2": 3.1121038794517517, "step": 273 }, { "epoch": 0.82, "learning_rate": 3.4395689552855956e-06, "loss": 0.9296, "step": 274 }, { "epoch": 0.82, "logps_train/policy_1_2": -196.20840454101562, "logps_train/policy_1_l": -207.37069702148438, "logps_train/policy_1_w": -132.5309600830078, "logps_train/policy_2_2": -137.18067932128906, "logps_train/policy_2_w": -191.92257690429688, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -1.1689622402191162, "rewards_train/1-l": -1.7793300151824951, "rewards_train/1-w": 2.028153896331787, "rewards_train/2-2": 2.2226061820983887, "rewards_train/2-w": -1.489133358001709, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8074839115142822, "rewards_train/margins_1": 3.1971161365509033, "rewards_train/margins_2": 3.7117395401000977, "step": 274 }, { "epoch": 0.82, "logps_train/policy_1_2": -87.8619384765625, "logps_train/policy_1_l": -118.20521545410156, "logps_train/policy_1_w": -82.48123931884766, "logps_train/policy_2_2": -66.6900863647461, "logps_train/policy_2_w": -133.15939331054688, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -104.5, "logps_train/ref_1_w": -99.5, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -119.5, "rewards_train/1-2": -0.05611598491668701, "rewards_train/1-l": -1.353919267654419, "rewards_train/1-w": 1.6840540170669556, "rewards_train/2-2": 1.0104835033416748, "rewards_train/2-w": -1.3583223819732666, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.0379732847213745, "rewards_train/margins_1": 1.7401700019836426, "rewards_train/margins_2": 2.3688058853149414, "step": 274 }, { "epoch": 0.82, "logps_train/policy_1_2": -171.0606689453125, "logps_train/policy_1_l": -163.08273315429688, "logps_train/policy_1_w": -99.68864440917969, "logps_train/policy_2_2": -107.03839111328125, "logps_train/policy_2_w": -153.4492645263672, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.49493408203125, "rewards_train/1-l": -1.591085433959961, "rewards_train/1-w": 1.751008152961731, "rewards_train/2-2": 2.140692710876465, "rewards_train/2-w": -1.328618049621582, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.342093586921692, "rewards_train/margins_1": 3.245942234992981, "rewards_train/margins_2": 3.469310760498047, "step": 274 }, { "epoch": 0.82, "logps_train/policy_1_2": -123.28404235839844, "logps_train/policy_1_l": -123.91409301757812, "logps_train/policy_1_w": -102.81950378417969, "logps_train/policy_2_2": -86.58301544189453, "logps_train/policy_2_w": -147.7244415283203, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -0.3485216200351715, "rewards_train/1-l": -0.8042271137237549, "rewards_train/1-w": 1.9024978876113892, "rewards_train/2-2": 1.8188471794128418, "rewards_train/2-w": -0.6430492401123047, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.706725001335144, "rewards_train/margins_1": 2.2510195076465607, "rewards_train/margins_2": 2.4618964195251465, "step": 274 }, { "epoch": 0.82, "logps_train/policy_1_2": -192.08804321289062, "logps_train/policy_1_l": -179.43646240234375, "logps_train/policy_1_w": -114.81517791748047, "logps_train/policy_2_2": -129.08700561523438, "logps_train/policy_2_w": -175.26589965820312, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -1.446305513381958, "rewards_train/1-l": -2.032879590988159, "rewards_train/1-w": 2.0823488235473633, "rewards_train/2-2": 2.6455960273742676, "rewards_train/2-w": -1.2840111255645752, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.1152284145355225, "rewards_train/margins_1": 3.5286543369293213, "rewards_train/margins_2": 3.9296071529388428, "step": 275 }, { "epoch": 0.82, "logps_train/policy_1_2": -152.87893676757812, "logps_train/policy_1_l": -196.2801513671875, "logps_train/policy_1_w": -99.45918273925781, "logps_train/policy_2_2": -112.48741149902344, "logps_train/policy_2_w": -149.7057342529297, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -0.6687535643577576, "rewards_train/1-l": -2.0890376567840576, "rewards_train/1-w": 2.106534957885742, "rewards_train/2-2": 1.6929585933685303, "rewards_train/2-w": -0.7228504419326782, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.1955726146698, "rewards_train/margins_1": 2.7752885222434998, "rewards_train/margins_2": 2.4158090353012085, "step": 275 }, { "epoch": 0.82, "logps_train/policy_1_2": -202.75550842285156, "logps_train/policy_1_l": -205.53793334960938, "logps_train/policy_1_w": -161.62118530273438, "logps_train/policy_2_2": -136.75604248046875, "logps_train/policy_2_w": -241.576904296875, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": -0.9314093589782715, "rewards_train/1-l": -2.391294002532959, "rewards_train/1-w": 2.913662910461426, "rewards_train/2-2": 2.902521848678589, "rewards_train/2-w": -1.8076906204223633, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.304956912994385, "rewards_train/margins_1": 3.8450722694396973, "rewards_train/margins_2": 4.710212469100952, "step": 275 }, { "epoch": 0.82, "logps_train/policy_1_2": -103.47010803222656, "logps_train/policy_1_l": -71.70004272460938, "logps_train/policy_1_w": -86.84798431396484, "logps_train/policy_2_2": -73.79598999023438, "logps_train/policy_2_w": -118.33792114257812, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -63.75, "logps_train/ref_1_w": -106.5, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -117.5, "rewards_train/1-2": 0.01236414909362793, "rewards_train/1-l": -0.785629153251648, "rewards_train/1-w": 1.988931655883789, "rewards_train/2-2": 1.8207913637161255, "rewards_train/2-w": -0.09199559688568115, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.774560809135437, "rewards_train/margins_1": 1.9765675067901611, "rewards_train/margins_2": 1.9127869606018066, "step": 275 }, { "epoch": 0.83, "learning_rate": 3.416636046133621e-06, "loss": 1.0908, "step": 276 }, { "epoch": 0.83, "logps_train/policy_1_2": -211.1529541015625, "logps_train/policy_1_l": -183.01881408691406, "logps_train/policy_1_w": -141.6649627685547, "logps_train/policy_2_2": -148.38604736328125, "logps_train/policy_2_w": -201.89334106445312, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.1059210300445557, "rewards_train/1-l": -1.4979748725891113, "rewards_train/1-w": 2.3395581245422363, "rewards_train/2-2": 3.0830259323120117, "rewards_train/2-w": -1.0842558145523071, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8375329971313477, "rewards_train/margins_1": 3.445479154586792, "rewards_train/margins_2": 4.167281746864319, "step": 276 }, { "epoch": 0.83, "logps_train/policy_1_2": -133.76258850097656, "logps_train/policy_1_l": -102.08525848388672, "logps_train/policy_1_w": -74.16971588134766, "logps_train/policy_2_2": -98.86251831054688, "logps_train/policy_2_w": -110.96196746826172, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -109.5, "rewards_train/1-2": 0.04346795380115509, "rewards_train/1-l": -0.9065729379653931, "rewards_train/1-w": 2.139376163482666, "rewards_train/2-2": 1.9546171426773071, "rewards_train/2-w": -0.15234896540641785, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.045949101448059, "rewards_train/margins_1": 2.095908209681511, "rewards_train/margins_2": 2.106966108083725, "step": 276 }, { "epoch": 0.83, "logps_train/policy_1_2": -217.41867065429688, "logps_train/policy_1_l": -168.83428955078125, "logps_train/policy_1_w": -129.7633056640625, "logps_train/policy_2_2": -133.10275268554688, "logps_train/policy_2_w": -197.12844848632812, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.8126686811447144, "rewards_train/1-l": -1.3804993629455566, "rewards_train/1-w": 2.0894904136657715, "rewards_train/2-2": 2.7635533809661865, "rewards_train/2-w": -1.5142133235931396, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.469989776611328, "rewards_train/margins_1": 3.902159094810486, "rewards_train/margins_2": 4.277766704559326, "step": 276 }, { "epoch": 0.83, "logps_train/policy_1_2": -157.991455078125, "logps_train/policy_1_l": -165.00938415527344, "logps_train/policy_1_w": -105.17717742919922, "logps_train/policy_2_2": -104.70331573486328, "logps_train/policy_2_w": -160.85009765625, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -1.1452394723892212, "rewards_train/1-l": -1.627342700958252, "rewards_train/1-w": 2.1022167205810547, "rewards_train/2-2": 2.0898241996765137, "rewards_train/2-w": -1.1300294399261475, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.7295594215393066, "rewards_train/margins_1": 3.247456192970276, "rewards_train/margins_2": 3.219853639602661, "step": 276 }, { "epoch": 0.83, "logps_train/policy_1_2": -163.81838989257812, "logps_train/policy_1_l": -138.35525512695312, "logps_train/policy_1_w": -106.27774810791016, "logps_train/policy_2_2": -118.86337280273438, "logps_train/policy_2_w": -153.9617919921875, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -1.0646517276763916, "rewards_train/1-l": -1.4290802478790283, "rewards_train/1-w": 2.299959659576416, "rewards_train/2-2": 2.195498466491699, "rewards_train/2-w": -0.7274290323257446, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.7290399074554443, "rewards_train/margins_1": 3.3646113872528076, "rewards_train/margins_2": 2.922927498817444, "step": 277 }, { "epoch": 0.83, "logps_train/policy_1_2": -259.37152099609375, "logps_train/policy_1_l": -224.43699645996094, "logps_train/policy_1_w": -133.4121551513672, "logps_train/policy_2_2": -177.8929443359375, "logps_train/policy_2_w": -203.20382690429688, "logps_train/ref_1_2": -241.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.8949673175811768, "rewards_train/1-l": -1.9458482265472412, "rewards_train/1-w": 2.880659341812134, "rewards_train/2-2": 3.1657843589782715, "rewards_train/2-w": -1.208662509918213, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.826507568359375, "rewards_train/margins_1": 4.7756266593933105, "rewards_train/margins_2": 4.374446868896484, "step": 277 }, { "epoch": 0.83, "logps_train/policy_1_2": -122.47615051269531, "logps_train/policy_1_l": -100.22372436523438, "logps_train/policy_1_w": -79.58629608154297, "logps_train/policy_2_2": -75.81465148925781, "logps_train/policy_2_w": -121.26368713378906, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -94.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": -1.1116771697998047, "rewards_train/1-l": -1.0495820045471191, "rewards_train/1-w": 1.4185190200805664, "rewards_train/2-2": 1.6923625469207764, "rewards_train/2-w": -0.8787130117416382, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.4681010246276855, "rewards_train/margins_1": 2.530196189880371, "rewards_train/margins_2": 2.5710755586624146, "step": 277 }, { "epoch": 0.83, "logps_train/policy_1_2": -202.58285522460938, "logps_train/policy_1_l": -162.38217163085938, "logps_train/policy_1_w": -147.22909545898438, "logps_train/policy_2_2": -136.075439453125, "logps_train/policy_2_w": -212.20333862304688, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.7910981178283691, "rewards_train/1-l": -1.4410479068756104, "rewards_train/1-w": 2.7356834411621094, "rewards_train/2-2": 2.3182373046875, "rewards_train/2-w": -1.1613503694534302, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.17673134803772, "rewards_train/margins_1": 4.5267815589904785, "rewards_train/margins_2": 3.47958767414093, "step": 277 }, { "epoch": 0.83, "learning_rate": 3.393613674919473e-06, "loss": 0.963, "step": 278 }, { "epoch": 0.83, "logps_train/policy_1_2": -162.4659881591797, "logps_train/policy_1_l": -110.99711608886719, "logps_train/policy_1_w": -121.03682708740234, "logps_train/policy_2_2": -110.06112670898438, "logps_train/policy_2_w": -177.1547088623047, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -102.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -0.8313649296760559, "rewards_train/1-l": -0.8231975436210632, "rewards_train/1-w": 2.06428599357605, "rewards_train/2-2": 2.4460363388061523, "rewards_train/2-w": -1.1685967445373535, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.887483537197113, "rewards_train/margins_1": 2.8956509232521057, "rewards_train/margins_2": 3.614633083343506, "step": 278 }, { "epoch": 0.83, "logps_train/policy_1_2": -230.2689666748047, "logps_train/policy_1_l": -177.2779541015625, "logps_train/policy_1_w": -147.42088317871094, "logps_train/policy_2_2": -158.54794311523438, "logps_train/policy_2_w": -212.06324768066406, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.6786038875579834, "rewards_train/1-l": -2.0929317474365234, "rewards_train/1-w": 2.642286539077759, "rewards_train/2-2": 2.943302869796753, "rewards_train/2-w": -1.1731204986572266, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.735218286514282, "rewards_train/margins_1": 4.320890426635742, "rewards_train/margins_2": 4.1164233684539795, "step": 278 }, { "epoch": 0.83, "logps_train/policy_1_2": -211.3033447265625, "logps_train/policy_1_l": -184.12310791015625, "logps_train/policy_1_w": -156.8882598876953, "logps_train/policy_2_2": -131.76409912109375, "logps_train/policy_2_w": -249.8470001220703, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": -1.9076794385910034, "rewards_train/1-l": -1.4595757722854614, "rewards_train/1-w": 2.757218599319458, "rewards_train/2-2": 2.7192938327789307, "rewards_train/2-w": -2.1524243354797363, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.216794371604919, "rewards_train/margins_1": 4.664898037910461, "rewards_train/margins_2": 4.871718168258667, "step": 278 }, { "epoch": 0.83, "logps_train/policy_1_2": -195.8914031982422, "logps_train/policy_1_l": -156.878173828125, "logps_train/policy_1_w": -121.67498016357422, "logps_train/policy_2_2": -121.74269104003906, "logps_train/policy_2_w": -187.5564422607422, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -1.3117973804473877, "rewards_train/1-l": -1.517797589302063, "rewards_train/1-w": 2.4375803470611572, "rewards_train/2-2": 2.9124505519866943, "rewards_train/2-w": -1.3706834316253662, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.95537793636322, "rewards_train/margins_1": 3.749377727508545, "rewards_train/margins_2": 4.2831339836120605, "step": 278 }, { "epoch": 0.84, "logps_train/policy_1_2": -186.77426147460938, "logps_train/policy_1_l": -167.38018798828125, "logps_train/policy_1_w": -130.78829956054688, "logps_train/policy_2_2": -128.08865356445312, "logps_train/policy_2_w": -192.8713836669922, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -1.2903165817260742, "rewards_train/1-l": -1.466240406036377, "rewards_train/1-w": 2.3927507400512695, "rewards_train/2-2": 2.209981918334961, "rewards_train/2-w": -1.463994026184082, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.8589911460876465, "rewards_train/margins_1": 3.6830673217773438, "rewards_train/margins_2": 3.673975944519043, "step": 279 }, { "epoch": 0.84, "logps_train/policy_1_2": -149.73431396484375, "logps_train/policy_1_l": -142.39376831054688, "logps_train/policy_1_w": -73.27701568603516, "logps_train/policy_2_2": -103.64611053466797, "logps_train/policy_2_w": -113.63066101074219, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -113.5, "rewards_train/1-2": -0.47499367594718933, "rewards_train/1-l": -1.9494590759277344, "rewards_train/1-w": 2.1887049674987793, "rewards_train/2-2": 2.42288875579834, "rewards_train/2-w": -0.05525369942188263, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.138164043426514, "rewards_train/margins_1": 2.6636986434459686, "rewards_train/margins_2": 2.4781424552202225, "step": 279 }, { "epoch": 0.84, "logps_train/policy_1_2": -237.62844848632812, "logps_train/policy_1_l": -211.31130981445312, "logps_train/policy_1_w": -150.01071166992188, "logps_train/policy_2_2": -174.7615509033203, "logps_train/policy_2_w": -215.33653259277344, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -0.3230001926422119, "rewards_train/1-l": -1.7865022420883179, "rewards_train/1-w": 3.2993202209472656, "rewards_train/2-2": 3.4785330295562744, "rewards_train/2-w": -0.3734968304634094, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.0858224630355835, "rewards_train/margins_1": 3.6223204135894775, "rewards_train/margins_2": 3.852029860019684, "step": 279 }, { "epoch": 0.84, "logps_train/policy_1_2": -209.90855407714844, "logps_train/policy_1_l": -177.18905639648438, "logps_train/policy_1_w": -132.04969787597656, "logps_train/policy_2_2": -150.00869750976562, "logps_train/policy_2_w": -204.111572265625, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.9827985763549805, "rewards_train/1-l": -2.1754488945007324, "rewards_train/1-w": 2.7270617485046387, "rewards_train/2-2": 2.5181493759155273, "rewards_train/2-w": -1.4002214670181274, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.902510643005371, "rewards_train/margins_1": 3.709860324859619, "rewards_train/margins_2": 3.918370842933655, "step": 279 }, { "epoch": 0.84, "learning_rate": 3.3705040885859975e-06, "loss": 0.8685, "step": 280 }, { "epoch": 0.84, "logps_train/policy_1_2": -174.30398559570312, "logps_train/policy_1_l": -172.6187286376953, "logps_train/policy_1_w": -127.515380859375, "logps_train/policy_2_2": -115.333740234375, "logps_train/policy_2_w": -188.14218139648438, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.1241238117218018, "rewards_train/1-l": -1.996321201324463, "rewards_train/1-w": 2.8948495388031006, "rewards_train/2-2": 2.5030524730682373, "rewards_train/2-w": -0.6597269773483276, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.8911707401275635, "rewards_train/margins_1": 4.018973350524902, "rewards_train/margins_2": 3.162779450416565, "step": 280 }, { "epoch": 0.84, "logps_train/policy_1_2": -176.42166137695312, "logps_train/policy_1_l": -141.4517822265625, "logps_train/policy_1_w": -131.89517211914062, "logps_train/policy_2_2": -128.01223754882812, "logps_train/policy_2_w": -174.84503173828125, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -0.4878695011138916, "rewards_train/1-l": -1.0449988842010498, "rewards_train/1-w": 2.1010897159576416, "rewards_train/2-2": 2.5870587825775146, "rewards_train/2-w": -0.6460999250411987, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.1460886001586914, "rewards_train/margins_1": 2.588959217071533, "rewards_train/margins_2": 3.2331587076187134, "step": 280 }, { "epoch": 0.84, "logps_train/policy_1_2": -212.70635986328125, "logps_train/policy_1_l": -176.39215087890625, "logps_train/policy_1_w": -138.30755615234375, "logps_train/policy_2_2": -164.29383850097656, "logps_train/policy_2_w": -192.54168701171875, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.4766909182071686, "rewards_train/1-l": -1.5708552598953247, "rewards_train/1-w": 2.9973695278167725, "rewards_train/2-2": 2.5237419605255127, "rewards_train/2-w": -0.08356352150440216, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.568224787712097, "rewards_train/margins_1": 3.474060446023941, "rewards_train/margins_2": 2.607305482029915, "step": 280 }, { "epoch": 0.84, "logps_train/policy_1_2": -190.2949981689453, "logps_train/policy_1_l": -148.81512451171875, "logps_train/policy_1_w": -142.73593139648438, "logps_train/policy_2_2": -131.3818817138672, "logps_train/policy_2_w": -216.37774658203125, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.1478588581085205, "rewards_train/1-l": -1.3811228275299072, "rewards_train/1-w": 3.004141330718994, "rewards_train/2-2": 2.371577501296997, "rewards_train/2-w": -1.917853832244873, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.385264158248901, "rewards_train/margins_1": 4.152000188827515, "rewards_train/margins_2": 4.28943133354187, "step": 280 }, { "epoch": 0.84, "logps_train/policy_1_2": -171.89761352539062, "logps_train/policy_1_l": -168.66921997070312, "logps_train/policy_1_w": -128.31309509277344, "logps_train/policy_2_2": -109.55126953125, "logps_train/policy_2_w": -184.19944763183594, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.9725740551948547, "rewards_train/1-l": -1.6371362209320068, "rewards_train/1-w": 3.1339259147644043, "rewards_train/2-2": 2.9062013626098633, "rewards_train/2-w": -0.13439840078353882, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.771062135696411, "rewards_train/margins_1": 4.106499969959259, "rewards_train/margins_2": 3.040599763393402, "step": 281 }, { "epoch": 0.84, "logps_train/policy_1_2": -148.43634033203125, "logps_train/policy_1_l": -112.98396301269531, "logps_train/policy_1_w": -71.32659149169922, "logps_train/policy_2_2": -105.72715759277344, "logps_train/policy_2_w": -111.46157836914062, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -105.0, "rewards_train/1-2": -0.29968884587287903, "rewards_train/1-l": -1.2813799381256104, "rewards_train/1-w": 1.4065989255905151, "rewards_train/2-2": 2.0157604217529297, "rewards_train/2-w": -0.6176425218582153, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.6879788637161255, "rewards_train/margins_1": 1.7062877714633942, "rewards_train/margins_2": 2.633402943611145, "step": 281 }, { "epoch": 0.84, "logps_train/policy_1_2": -216.50845336914062, "logps_train/policy_1_l": -236.68399047851562, "logps_train/policy_1_w": -187.94613647460938, "logps_train/policy_2_2": -164.93673706054688, "logps_train/policy_2_w": -254.41357421875, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -215.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": -0.19615823030471802, "rewards_train/1-l": -2.1223058700561523, "rewards_train/1-w": 3.196011543273926, "rewards_train/2-2": 2.769606590270996, "rewards_train/2-w": -0.6284669637680054, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.318317413330078, "rewards_train/margins_1": 3.392169773578644, "rewards_train/margins_2": 3.3980735540390015, "step": 281 }, { "epoch": 0.84, "logps_train/policy_1_2": -196.10696411132812, "logps_train/policy_1_l": -211.23214721679688, "logps_train/policy_1_w": -143.19638061523438, "logps_train/policy_2_2": -145.96832275390625, "logps_train/policy_2_w": -193.94873046875, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": 0.0916483998298645, "rewards_train/1-l": -1.303438663482666, "rewards_train/1-w": 2.4311442375183105, "rewards_train/2-2": 3.001995325088501, "rewards_train/2-w": -0.6497556567192078, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.7345829010009766, "rewards_train/margins_1": 2.339495837688446, "rewards_train/margins_2": 3.6517509818077087, "step": 281 }, { "epoch": 0.84, "learning_rate": 3.3473095425880795e-06, "loss": 1.0094, "step": 282 }, { "epoch": 0.84, "logps_train/policy_1_2": -222.751708984375, "logps_train/policy_1_l": -227.06744384765625, "logps_train/policy_1_w": -174.84976196289062, "logps_train/policy_2_2": -154.45196533203125, "logps_train/policy_2_w": -243.29205322265625, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": -0.7607161998748779, "rewards_train/1-l": -1.8108468055725098, "rewards_train/1-w": 3.0023536682128906, "rewards_train/2-2": 3.221992254257202, "rewards_train/2-w": -1.3140943050384521, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.8132004737854, "rewards_train/margins_1": 3.7630698680877686, "rewards_train/margins_2": 4.536086559295654, "step": 282 }, { "epoch": 0.84, "logps_train/policy_1_2": -136.06280517578125, "logps_train/policy_1_l": -78.94329833984375, "logps_train/policy_1_w": -91.1417007446289, "logps_train/policy_2_2": -89.61005401611328, "logps_train/policy_2_w": -132.9930419921875, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -71.5, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": -0.5801075100898743, "rewards_train/1-l": -0.7284117937088013, "rewards_train/1-w": 2.0569238662719727, "rewards_train/2-2": 2.258331298828125, "rewards_train/2-w": -0.609459638595581, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.785335659980774, "rewards_train/margins_1": 2.637031376361847, "rewards_train/margins_2": 2.867790937423706, "step": 282 }, { "epoch": 0.84, "logps_train/policy_1_2": -153.76785278320312, "logps_train/policy_1_l": -159.47454833984375, "logps_train/policy_1_w": -98.88794708251953, "logps_train/policy_2_2": -117.96528625488281, "logps_train/policy_2_w": -142.61785888671875, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -0.25901341438293457, "rewards_train/1-l": -1.660979986190796, "rewards_train/1-w": 2.4755606651306152, "rewards_train/2-2": 1.9690965414047241, "rewards_train/2-w": -0.07584899663925171, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.136540651321411, "rewards_train/margins_1": 2.73457407951355, "rewards_train/margins_2": 2.044945538043976, "step": 282 }, { "epoch": 0.84, "logps_train/policy_1_2": -194.6478729248047, "logps_train/policy_1_l": -180.0846710205078, "logps_train/policy_1_w": -168.77000427246094, "logps_train/policy_2_2": -122.46218872070312, "logps_train/policy_2_w": -246.39492797851562, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": -1.0327558517456055, "rewards_train/1-l": -1.6154985427856445, "rewards_train/1-w": 3.495070219039917, "rewards_train/2-2": 3.0623745918273926, "rewards_train/2-w": -1.5285561084747314, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.1105687618255615, "rewards_train/margins_1": 4.5278260707855225, "rewards_train/margins_2": 4.590930700302124, "step": 282 }, { "epoch": 0.85, "logps_train/policy_1_2": -153.5784454345703, "logps_train/policy_1_l": -149.33212280273438, "logps_train/policy_1_w": -106.0982666015625, "logps_train/policy_2_2": -110.40576171875, "logps_train/policy_2_w": -157.87063598632812, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -0.28479835391044617, "rewards_train/1-l": -1.4565526247024536, "rewards_train/1-w": 2.421326160430908, "rewards_train/2-2": 2.262938976287842, "rewards_train/2-w": -0.5964384078979492, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.877878785133362, "rewards_train/margins_1": 2.7061245143413544, "rewards_train/margins_2": 2.859377384185791, "step": 283 }, { "epoch": 0.85, "logps_train/policy_1_2": -225.81097412109375, "logps_train/policy_1_l": -212.08474731445312, "logps_train/policy_1_w": -152.01324462890625, "logps_train/policy_2_2": -161.84002685546875, "logps_train/policy_2_w": -224.40087890625, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": -0.7580503821372986, "rewards_train/1-l": -1.6180452108383179, "rewards_train/1-w": 2.6775331497192383, "rewards_train/2-2": 2.6154117584228516, "rewards_train/2-w": -1.1271965503692627, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.295578360557556, "rewards_train/margins_1": 3.435583531856537, "rewards_train/margins_2": 3.7426083087921143, "step": 283 }, { "epoch": 0.85, "logps_train/policy_1_2": -171.51480102539062, "logps_train/policy_1_l": -133.68606567382812, "logps_train/policy_1_w": -125.81724548339844, "logps_train/policy_2_2": -122.84349060058594, "logps_train/policy_2_w": -197.71798706054688, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -0.8132404088973999, "rewards_train/1-l": -0.6654757261276245, "rewards_train/1-w": 3.6026086807250977, "rewards_train/2-2": 2.0510332584381104, "rewards_train/2-w": -0.27108514308929443, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.268084406852722, "rewards_train/margins_1": 4.415849089622498, "rewards_train/margins_2": 2.322118401527405, "step": 283 }, { "epoch": 0.85, "logps_train/policy_1_2": -170.01942443847656, "logps_train/policy_1_l": -141.88034057617188, "logps_train/policy_1_w": -114.45227813720703, "logps_train/policy_2_2": -117.21263885498047, "logps_train/policy_2_w": -160.45816040039062, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -0.7253797054290771, "rewards_train/1-l": -1.040376901626587, "rewards_train/1-w": 2.5705924034118652, "rewards_train/2-2": 2.4801034927368164, "rewards_train/2-w": -0.20167554914951324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.610969305038452, "rewards_train/margins_1": 3.2959721088409424, "rewards_train/margins_2": 2.6817790418863297, "step": 283 }, { "epoch": 0.85, "learning_rate": 3.32403230067252e-06, "loss": 1.0154, "step": 284 }, { "epoch": 0.85, "logps_train/policy_1_2": -117.5568618774414, "logps_train/policy_1_l": -164.21353149414062, "logps_train/policy_1_w": -81.17012023925781, "logps_train/policy_2_2": -75.84938049316406, "logps_train/policy_2_w": -123.4046859741211, "logps_train/ref_1_2": -110.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -92.5, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": -0.7744367122650146, "rewards_train/1-l": -1.5977213382720947, "rewards_train/1-w": 1.4417773485183716, "rewards_train/2-2": 1.6449453830718994, "rewards_train/2-w": -0.6865628957748413, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0394986867904663, "rewards_train/margins_1": 2.2162140607833862, "rewards_train/margins_2": 2.3315082788467407, "step": 284 }, { "epoch": 0.85, "logps_train/policy_1_2": -185.56710815429688, "logps_train/policy_1_l": -99.86796569824219, "logps_train/policy_1_w": -116.47140502929688, "logps_train/policy_2_2": -137.212646484375, "logps_train/policy_2_w": -152.31607055664062, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": 0.08391329646110535, "rewards_train/1-l": -0.7225028872489929, "rewards_train/1-w": 1.7214140892028809, "rewards_train/2-2": 3.177953004837036, "rewards_train/2-w": -0.46754422783851624, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.443916976451874, "rewards_train/margins_1": 1.6375007927417755, "rewards_train/margins_2": 3.6454972326755524, "step": 284 }, { "epoch": 0.85, "logps_train/policy_1_2": -222.10919189453125, "logps_train/policy_1_l": -156.1249542236328, "logps_train/policy_1_w": -109.48997497558594, "logps_train/policy_2_2": -150.70928955078125, "logps_train/policy_2_w": -167.25634765625, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -1.335136890411377, "rewards_train/1-l": -0.8998977541923523, "rewards_train/1-w": 1.7122325897216797, "rewards_train/2-2": 3.0427427291870117, "rewards_train/2-w": -1.6713390350341797, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.612130343914032, "rewards_train/margins_1": 3.0473694801330566, "rewards_train/margins_2": 4.714081764221191, "step": 284 }, { "epoch": 0.85, "logps_train/policy_1_2": -216.18080139160156, "logps_train/policy_1_l": -138.77896118164062, "logps_train/policy_1_w": -165.83775329589844, "logps_train/policy_2_2": -145.43377685546875, "logps_train/policy_2_w": -239.42105102539062, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": -1.155970573425293, "rewards_train/1-l": -1.1126617193222046, "rewards_train/1-w": 3.484583616256714, "rewards_train/2-2": 2.9663891792297363, "rewards_train/2-w": -0.5596827864646912, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.5972453355789185, "rewards_train/margins_1": 4.640554189682007, "rewards_train/margins_2": 3.5260719656944275, "step": 284 }, { "epoch": 0.85, "logps_train/policy_1_2": -167.441162109375, "logps_train/policy_1_l": -155.06564331054688, "logps_train/policy_1_w": -110.6407470703125, "logps_train/policy_2_2": -125.53506469726562, "logps_train/policy_2_w": -159.65768432617188, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -0.5573967695236206, "rewards_train/1-l": -1.2151579856872559, "rewards_train/1-w": 2.2605342864990234, "rewards_train/2-2": 1.9132901430130005, "rewards_train/2-w": -0.4071744680404663, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4756922721862793, "rewards_train/margins_1": 2.817931056022644, "rewards_train/margins_2": 2.320464611053467, "step": 285 }, { "epoch": 0.85, "logps_train/policy_1_2": -169.02198791503906, "logps_train/policy_1_l": -151.24453735351562, "logps_train/policy_1_w": -146.81138610839844, "logps_train/policy_2_2": -116.01060485839844, "logps_train/policy_2_w": -209.65089416503906, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -0.6717302203178406, "rewards_train/1-l": -1.3715236186981201, "rewards_train/1-w": 2.7948379516601562, "rewards_train/2-2": 2.413783073425293, "rewards_train/2-w": -0.9955574870109558, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.166361570358276, "rewards_train/margins_1": 3.466568171977997, "rewards_train/margins_2": 3.409340560436249, "step": 285 }, { "epoch": 0.85, "logps_train/policy_1_2": -123.94477844238281, "logps_train/policy_1_l": -96.84427642822266, "logps_train/policy_1_w": -89.46994018554688, "logps_train/policy_2_2": -87.09064483642578, "logps_train/policy_2_w": -124.09320831298828, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": -0.6714311242103577, "rewards_train/1-l": -1.1786174774169922, "rewards_train/1-w": 1.5565211772918701, "rewards_train/2-2": 1.626091718673706, "rewards_train/2-w": -0.3757272958755493, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.7351386547088623, "rewards_train/margins_1": 2.227952301502228, "rewards_train/margins_2": 2.0018190145492554, "step": 285 }, { "epoch": 0.85, "logps_train/policy_1_2": -166.50608825683594, "logps_train/policy_1_l": -135.0317840576172, "logps_train/policy_1_w": -120.75921630859375, "logps_train/policy_2_2": -118.29822540283203, "logps_train/policy_2_w": -164.62901306152344, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -0.6435775756835938, "rewards_train/1-l": -1.186381459236145, "rewards_train/1-w": 2.236968755722046, "rewards_train/2-2": 2.031506299972534, "rewards_train/2-w": -0.3200302720069885, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.423350214958191, "rewards_train/margins_1": 2.8805463314056396, "rewards_train/margins_2": 2.3515365719795227, "step": 285 }, { "epoch": 0.86, "learning_rate": 3.300674634657094e-06, "loss": 1.0424, "step": 286 }, { "epoch": 0.86, "logps_train/policy_1_2": -165.51153564453125, "logps_train/policy_1_l": -154.97787475585938, "logps_train/policy_1_w": -121.12759399414062, "logps_train/policy_2_2": -114.74317169189453, "logps_train/policy_2_w": -188.89651489257812, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -0.7408509254455566, "rewards_train/1-l": -1.4105786085128784, "rewards_train/1-w": 3.098324775695801, "rewards_train/2-2": 2.293944835662842, "rewards_train/2-w": -0.7901412844657898, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.508903384208679, "rewards_train/margins_1": 3.8391757011413574, "rewards_train/margins_2": 3.0840861201286316, "step": 286 }, { "epoch": 0.86, "logps_train/policy_1_2": -222.4974365234375, "logps_train/policy_1_l": -167.57675170898438, "logps_train/policy_1_w": -171.92587280273438, "logps_train/policy_2_2": -160.07147216796875, "logps_train/policy_2_w": -240.0823211669922, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": -1.396618127822876, "rewards_train/1-l": -1.4600181579589844, "rewards_train/1-w": 3.331631898880005, "rewards_train/2-2": 2.580352783203125, "rewards_train/2-w": -0.8793262243270874, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.791650056838989, "rewards_train/margins_1": 4.728250026702881, "rewards_train/margins_2": 3.4596790075302124, "step": 286 }, { "epoch": 0.86, "logps_train/policy_1_2": -194.6627197265625, "logps_train/policy_1_l": -138.60995483398438, "logps_train/policy_1_w": -115.68635559082031, "logps_train/policy_2_2": -135.121337890625, "logps_train/policy_2_w": -163.89859008789062, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.167006015777588, "rewards_train/1-l": -1.8231538534164429, "rewards_train/1-w": 2.4274089336395264, "rewards_train/2-2": 2.5763421058654785, "rewards_train/2-w": -0.4849774241447449, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.250562787055969, "rewards_train/margins_1": 3.5944149494171143, "rewards_train/margins_2": 3.0613195300102234, "step": 286 }, { "epoch": 0.86, "logps_train/policy_1_2": -134.14840698242188, "logps_train/policy_1_l": -113.65213012695312, "logps_train/policy_1_w": -99.67623901367188, "logps_train/policy_2_2": -90.47259521484375, "logps_train/policy_2_w": -142.538330078125, "logps_train/ref_1_2": -126.5, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -0.7558571100234985, "rewards_train/1-l": -1.1407005786895752, "rewards_train/1-w": 1.7685086727142334, "rewards_train/2-2": 1.90195894241333, "rewards_train/2-w": -0.8510972857475281, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.9092092514038086, "rewards_train/margins_1": 2.524365782737732, "rewards_train/margins_2": 2.753056228160858, "step": 286 }, { "epoch": 0.86, "logps_train/policy_1_2": -157.84552001953125, "logps_train/policy_1_l": -98.64806365966797, "logps_train/policy_1_w": -80.99036407470703, "logps_train/policy_2_2": -102.14605712890625, "logps_train/policy_2_w": -132.72503662109375, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": -1.2681454420089722, "rewards_train/1-l": -0.8786739706993103, "rewards_train/1-w": 1.9948110580444336, "rewards_train/2-2": 1.835019588470459, "rewards_train/2-w": -0.6986757516860962, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.873485028743744, "rewards_train/margins_1": 3.2629565000534058, "rewards_train/margins_2": 2.533695340156555, "step": 287 }, { "epoch": 0.86, "logps_train/policy_1_2": -228.89256286621094, "logps_train/policy_1_l": -151.71701049804688, "logps_train/policy_1_w": -141.93563842773438, "logps_train/policy_2_2": -158.86285400390625, "logps_train/policy_2_w": -208.29644775390625, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.3722642660140991, "rewards_train/1-l": -1.5905978679656982, "rewards_train/1-w": 2.6398348808288574, "rewards_train/2-2": 3.0845158100128174, "rewards_train/2-w": -1.126325011253357, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.230432748794556, "rewards_train/margins_1": 4.0120991468429565, "rewards_train/margins_2": 4.210840821266174, "step": 287 }, { "epoch": 0.86, "logps_train/policy_1_2": -174.3925323486328, "logps_train/policy_1_l": -151.08749389648438, "logps_train/policy_1_w": -111.79373168945312, "logps_train/policy_2_2": -123.90335083007812, "logps_train/policy_2_w": -149.90707397460938, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -0.398237943649292, "rewards_train/1-l": -1.701228380203247, "rewards_train/1-w": 2.3764867782592773, "rewards_train/2-2": 2.5297818183898926, "rewards_train/2-w": 0.11437150835990906, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.077715158462524, "rewards_train/margins_1": 2.7747247219085693, "rewards_train/margins_2": 2.4154103100299835, "step": 287 }, { "epoch": 0.86, "logps_train/policy_1_2": -183.7010955810547, "logps_train/policy_1_l": -203.58383178710938, "logps_train/policy_1_w": -136.00697326660156, "logps_train/policy_2_2": -124.9347915649414, "logps_train/policy_2_w": -201.3541717529297, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -0.9134694337844849, "rewards_train/1-l": -2.1236183643341064, "rewards_train/1-w": 2.2071151733398438, "rewards_train/2-2": 2.560427665710449, "rewards_train/2-w": -1.4760420322418213, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.33073353767395, "rewards_train/margins_1": 3.1205846071243286, "rewards_train/margins_2": 4.0364696979522705, "step": 287 }, { "epoch": 0.86, "learning_rate": 3.2772388242088283e-06, "loss": 0.8668, "step": 288 }, { "epoch": 0.86, "logps_train/policy_1_2": -140.60430908203125, "logps_train/policy_1_l": -96.9017333984375, "logps_train/policy_1_w": -91.05072784423828, "logps_train/policy_2_2": -93.97063446044922, "logps_train/policy_2_w": -136.1879119873047, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -1.446418046951294, "rewards_train/1-l": -0.5292847156524658, "rewards_train/1-w": 1.9697320461273193, "rewards_train/2-2": 1.3260817527770996, "rewards_train/2-w": -0.5711356401443481, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 2.499016761779785, "rewards_train/margins_1": 3.4161500930786133, "rewards_train/margins_2": 1.8972173929214478, "step": 288 }, { "epoch": 0.86, "logps_train/policy_1_2": -229.9269256591797, "logps_train/policy_1_l": -236.111083984375, "logps_train/policy_1_w": -155.51507568359375, "logps_train/policy_2_2": -161.29647827148438, "logps_train/policy_2_w": -211.38526916503906, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": -0.7290199995040894, "rewards_train/1-l": -2.3930420875549316, "rewards_train/1-w": 3.555169105529785, "rewards_train/2-2": 3.414297580718994, "rewards_train/2-w": 0.3661606013774872, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.948211193084717, "rewards_train/margins_1": 4.2841891050338745, "rewards_train/margins_2": 3.048136979341507, "step": 288 }, { "epoch": 0.86, "logps_train/policy_1_2": -201.15025329589844, "logps_train/policy_1_l": -157.62051391601562, "logps_train/policy_1_w": -152.09716796875, "logps_train/policy_2_2": -146.15184020996094, "logps_train/policy_2_w": -205.42642211914062, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -0.20721258223056793, "rewards_train/1-l": -1.7088274955749512, "rewards_train/1-w": 3.235546588897705, "rewards_train/2-2": 3.0801281929016113, "rewards_train/2-w": -0.2555316090583801, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.944374084472656, "rewards_train/margins_1": 3.442759171128273, "rewards_train/margins_2": 3.3356598019599915, "step": 288 }, { "epoch": 0.86, "logps_train/policy_1_2": -146.07472229003906, "logps_train/policy_1_l": -72.79219055175781, "logps_train/policy_1_w": -86.53163146972656, "logps_train/policy_2_2": -103.1900863647461, "logps_train/policy_2_w": -129.1277313232422, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -63.25, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": -0.9934102892875671, "rewards_train/1-l": -0.9604206085205078, "rewards_train/1-w": 2.4428324699401855, "rewards_train/2-2": 1.8188819885253906, "rewards_train/2-w": -0.22742077708244324, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4032530784606934, "rewards_train/margins_1": 3.4362427592277527, "rewards_train/margins_2": 2.046302765607834, "step": 288 }, { "epoch": 0.87, "logps_train/policy_1_2": -180.30316162109375, "logps_train/policy_1_l": -112.64990234375, "logps_train/policy_1_w": -106.64231872558594, "logps_train/policy_2_2": -109.26922607421875, "logps_train/policy_2_w": -174.69773864746094, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -2.1625423431396484, "rewards_train/1-l": -1.5635251998901367, "rewards_train/1-w": 2.222486734390259, "rewards_train/2-2": 2.2486629486083984, "rewards_train/2-w": -1.7373522520065308, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.7860119342803955, "rewards_train/margins_1": 4.385029077529907, "rewards_train/margins_2": 3.986015200614929, "step": 289 }, { "epoch": 0.87, "logps_train/policy_1_2": -208.43714904785156, "logps_train/policy_1_l": -165.35806274414062, "logps_train/policy_1_w": -134.60296630859375, "logps_train/policy_2_2": -142.32940673828125, "logps_train/policy_2_w": -198.9530792236328, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -1.129652738571167, "rewards_train/1-l": -1.84342360496521, "rewards_train/1-w": 2.3740782737731934, "rewards_train/2-2": 2.859245777130127, "rewards_train/2-w": -1.7593698501586914, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.217501878738403, "rewards_train/margins_1": 3.5037310123443604, "rewards_train/margins_2": 4.618615627288818, "step": 289 }, { "epoch": 0.87, "logps_train/policy_1_2": -190.62713623046875, "logps_train/policy_1_l": -199.87069702148438, "logps_train/policy_1_w": -132.30372619628906, "logps_train/policy_2_2": -136.77719116210938, "logps_train/policy_2_w": -184.06719970703125, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.1017776727676392, "rewards_train/1-l": -2.2626559734344482, "rewards_train/1-w": 2.3670876026153564, "rewards_train/2-2": 2.031363010406494, "rewards_train/2-w": -0.45047056674957275, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.629743576049805, "rewards_train/margins_1": 3.4688652753829956, "rewards_train/margins_2": 2.481833577156067, "step": 289 }, { "epoch": 0.87, "logps_train/policy_1_2": -133.46524047851562, "logps_train/policy_1_l": -134.38534545898438, "logps_train/policy_1_w": -74.13630676269531, "logps_train/policy_2_2": -99.85824584960938, "logps_train/policy_2_w": -104.97587585449219, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -89.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -102.5, "rewards_train/1-2": -0.7043370604515076, "rewards_train/1-l": -1.323495626449585, "rewards_train/1-w": 1.463029146194458, "rewards_train/2-2": 1.367691159248352, "rewards_train/2-w": -0.23723554611206055, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.786524772644043, "rewards_train/margins_1": 2.1673662066459656, "rewards_train/margins_2": 1.6049267053604126, "step": 289 }, { "epoch": 0.87, "learning_rate": 3.253727156621508e-06, "loss": 1.0328, "step": 290 }, { "epoch": 0.87, "logps_train/policy_1_2": -183.37266540527344, "logps_train/policy_1_l": -181.35498046875, "logps_train/policy_1_w": -130.42752075195312, "logps_train/policy_2_2": -131.89173889160156, "logps_train/policy_2_w": -174.32113647460938, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": 0.012538373470306396, "rewards_train/1-l": -1.6896476745605469, "rewards_train/1-w": 2.097872018814087, "rewards_train/2-2": 2.7440290451049805, "rewards_train/2-w": -0.5297708511352539, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.787519693374634, "rewards_train/margins_1": 2.0853336453437805, "rewards_train/margins_2": 3.2737998962402344, "step": 290 }, { "epoch": 0.87, "logps_train/policy_1_2": -176.98733520507812, "logps_train/policy_1_l": -132.8041534423828, "logps_train/policy_1_w": -120.30987548828125, "logps_train/policy_2_2": -125.51824951171875, "logps_train/policy_2_w": -182.88584899902344, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.8155308961868286, "rewards_train/1-l": -1.3790483474731445, "rewards_train/1-w": 2.5928401947021484, "rewards_train/2-2": 2.13215970993042, "rewards_train/2-w": -1.2956165075302124, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.971888542175293, "rewards_train/margins_1": 3.408371090888977, "rewards_train/margins_2": 3.4277762174606323, "step": 290 }, { "epoch": 0.87, "logps_train/policy_1_2": -141.7999267578125, "logps_train/policy_1_l": -107.47830963134766, "logps_train/policy_1_w": -100.7523422241211, "logps_train/policy_2_2": -100.82041931152344, "logps_train/policy_2_w": -149.50784301757812, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -0.438978374004364, "rewards_train/1-l": -1.2736122608184814, "rewards_train/1-w": 2.436875343322754, "rewards_train/2-2": 2.1072158813476562, "rewards_train/2-w": -0.6964876055717468, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.7104876041412354, "rewards_train/margins_1": 2.875853717327118, "rewards_train/margins_2": 2.803703486919403, "step": 290 }, { "epoch": 0.87, "logps_train/policy_1_2": -160.95664978027344, "logps_train/policy_1_l": -167.96421813964844, "logps_train/policy_1_w": -84.27096557617188, "logps_train/policy_2_2": -105.11909484863281, "logps_train/policy_2_w": -118.0370101928711, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": -1.4794542789459229, "rewards_train/1-l": -1.9522807598114014, "rewards_train/1-w": 1.8796411752700806, "rewards_train/2-2": 2.110356330871582, "rewards_train/2-w": -0.2972554862499237, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.831921935081482, "rewards_train/margins_1": 3.3590954542160034, "rewards_train/margins_2": 2.4076118171215057, "step": 290 }, { "epoch": 0.87, "logps_train/policy_1_2": -173.93341064453125, "logps_train/policy_1_l": -151.5802764892578, "logps_train/policy_1_w": -137.88223266601562, "logps_train/policy_2_2": -119.63995361328125, "logps_train/policy_2_w": -199.29498291015625, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.0038883686065674, "rewards_train/1-l": -1.6108601093292236, "rewards_train/1-w": 2.762753963470459, "rewards_train/2-2": 2.3447933197021484, "rewards_train/2-w": -1.2220768928527832, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.373614072799683, "rewards_train/margins_1": 3.7666423320770264, "rewards_train/margins_2": 3.5668702125549316, "step": 291 }, { "epoch": 0.87, "logps_train/policy_1_2": -151.6942901611328, "logps_train/policy_1_l": -179.3535919189453, "logps_train/policy_1_w": -124.29501342773438, "logps_train/policy_2_2": -108.39763641357422, "logps_train/policy_2_w": -167.0033416748047, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": 0.03760215640068054, "rewards_train/1-l": -1.8203202486038208, "rewards_train/1-w": 2.2669830322265625, "rewards_train/2-2": 2.568439245223999, "rewards_train/2-w": 0.013337746262550354, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.087303280830383, "rewards_train/margins_1": 2.229380875825882, "rewards_train/margins_2": 2.5551014989614487, "step": 291 }, { "epoch": 0.87, "logps_train/policy_1_2": -274.9936828613281, "logps_train/policy_1_l": -232.2935791015625, "logps_train/policy_1_w": -155.220947265625, "logps_train/policy_2_2": -181.23973083496094, "logps_train/policy_2_w": -237.77951049804688, "logps_train/ref_1_2": -253.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -217.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": -2.2212438583374023, "rewards_train/1-l": -3.194202423095703, "rewards_train/1-w": 3.8357181549072266, "rewards_train/2-2": 3.5557141304016113, "rewards_train/2-w": -0.7099829912185669, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 7.02992057800293, "rewards_train/margins_1": 6.056962013244629, "rewards_train/margins_2": 4.265697121620178, "step": 291 }, { "epoch": 0.87, "logps_train/policy_1_2": -178.5894775390625, "logps_train/policy_1_l": -152.63601684570312, "logps_train/policy_1_w": -98.8375473022461, "logps_train/policy_2_2": -114.36094665527344, "logps_train/policy_2_w": -164.55271911621094, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -2.062206745147705, "rewards_train/1-l": -1.4367473125457764, "rewards_train/1-w": 2.1443700790405273, "rewards_train/2-2": 1.8851463794708252, "rewards_train/2-w": -2.032712697982788, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5811173915863037, "rewards_train/margins_1": 4.206576824188232, "rewards_train/margins_2": 3.9178590774536133, "step": 291 }, { "epoch": 0.87, "learning_rate": 3.23014192659244e-06, "loss": 1.04, "step": 292 }, { "epoch": 0.87, "logps_train/policy_1_2": -126.37470245361328, "logps_train/policy_1_l": -106.18790435791016, "logps_train/policy_1_w": -100.50350189208984, "logps_train/policy_2_2": -77.4364013671875, "logps_train/policy_2_w": -145.57484436035156, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -0.7796568870544434, "rewards_train/1-l": -1.1096103191375732, "rewards_train/1-w": 2.346524715423584, "rewards_train/2-2": 1.9422974586486816, "rewards_train/2-w": -0.5715474486351013, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4561350345611572, "rewards_train/margins_1": 3.1261816024780273, "rewards_train/margins_2": 2.513844907283783, "step": 292 }, { "epoch": 0.87, "logps_train/policy_1_2": -193.64627075195312, "logps_train/policy_1_l": -176.55535888671875, "logps_train/policy_1_w": -136.8470916748047, "logps_train/policy_2_2": -119.74727630615234, "logps_train/policy_2_w": -227.24298095703125, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -1.8232200145721436, "rewards_train/1-l": -2.3766298294067383, "rewards_train/1-w": 3.1051344871520996, "rewards_train/2-2": 2.5433883666992188, "rewards_train/2-w": -2.2211720943450928, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.481764316558838, "rewards_train/margins_1": 4.928354501724243, "rewards_train/margins_2": 4.7645604610443115, "step": 292 }, { "epoch": 0.87, "logps_train/policy_1_2": -228.69894409179688, "logps_train/policy_1_l": -238.86322021484375, "logps_train/policy_1_w": -147.60618591308594, "logps_train/policy_2_2": -151.27871704101562, "logps_train/policy_2_w": -220.84420776367188, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -1.734737753868103, "rewards_train/1-l": -3.2477474212646484, "rewards_train/1-w": 2.3596935272216797, "rewards_train/2-2": 2.9627535343170166, "rewards_train/2-w": -1.8844209909439087, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.607440948486328, "rewards_train/margins_1": 4.094431281089783, "rewards_train/margins_2": 4.847174525260925, "step": 292 }, { "epoch": 0.87, "logps_train/policy_1_2": -160.4040069580078, "logps_train/policy_1_l": -186.0749969482422, "logps_train/policy_1_w": -144.78529357910156, "logps_train/policy_2_2": -114.48379516601562, "logps_train/policy_2_w": -201.11717224121094, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -0.9019244909286499, "rewards_train/1-l": -1.8933875560760498, "rewards_train/1-w": 2.533970594406128, "rewards_train/2-2": 2.0477139949798584, "rewards_train/2-w": -0.6433591842651367, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.427358150482178, "rewards_train/margins_1": 3.435895085334778, "rewards_train/margins_2": 2.691073179244995, "step": 292 }, { "epoch": 0.88, "logps_train/policy_1_2": -186.17123413085938, "logps_train/policy_1_l": -201.12893676757812, "logps_train/policy_1_w": -116.5099868774414, "logps_train/policy_2_2": -134.10311889648438, "logps_train/policy_2_w": -173.03173828125, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.6952489614486694, "rewards_train/1-l": -2.4769558906555176, "rewards_train/1-w": 3.0200953483581543, "rewards_train/2-2": 2.4670307636260986, "rewards_train/2-w": -0.3727051913738251, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.497051239013672, "rewards_train/margins_1": 3.7153443098068237, "rewards_train/margins_2": 2.8397359549999237, "step": 293 }, { "epoch": 0.88, "logps_train/policy_1_2": -83.19496154785156, "logps_train/policy_1_l": -96.00341796875, "logps_train/policy_1_w": -109.74696350097656, "logps_train/policy_2_2": -57.40713882446289, "logps_train/policy_2_w": -149.5095977783203, "logps_train/ref_1_2": -81.5, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -69.5, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": -0.1721750795841217, "rewards_train/1-l": -1.5202090740203857, "rewards_train/1-w": 1.4456713199615479, "rewards_train/2-2": 1.1959376335144043, "rewards_train/2-w": -1.0488784313201904, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.9658803939819336, "rewards_train/margins_1": 1.6178463995456696, "rewards_train/margins_2": 2.2448160648345947, "step": 293 }, { "epoch": 0.88, "logps_train/policy_1_2": -222.78030395507812, "logps_train/policy_1_l": -190.48402404785156, "logps_train/policy_1_w": -169.10784912109375, "logps_train/policy_2_2": -157.24063110351562, "logps_train/policy_2_w": -243.05767822265625, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": -1.0366237163543701, "rewards_train/1-l": -2.078552722930908, "rewards_train/1-w": 3.3435120582580566, "rewards_train/2-2": 2.7167563438415527, "rewards_train/2-w": -1.1690492630004883, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.422064781188965, "rewards_train/margins_1": 4.380135774612427, "rewards_train/margins_2": 3.885805606842041, "step": 293 }, { "epoch": 0.88, "logps_train/policy_1_2": -139.40773010253906, "logps_train/policy_1_l": -130.29190063476562, "logps_train/policy_1_w": -94.96157836914062, "logps_train/policy_2_2": -95.1063003540039, "logps_train/policy_2_w": -126.19551086425781, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -123.5, "rewards_train/1-2": -0.16241604089736938, "rewards_train/1-l": -1.6648107767105103, "rewards_train/1-w": 1.7785495519638062, "rewards_train/2-2": 2.3631372451782227, "rewards_train/2-w": -0.2599811553955078, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4433603286743164, "rewards_train/margins_1": 1.9409655928611755, "rewards_train/margins_2": 2.6231184005737305, "step": 293 }, { "epoch": 0.88, "learning_rate": 3.2064854359984976e-06, "loss": 1.0646, "step": 294 }, { "epoch": 0.88, "logps_train/policy_1_2": -199.33209228515625, "logps_train/policy_1_l": -201.6068115234375, "logps_train/policy_1_w": -141.375, "logps_train/policy_2_2": -146.7361297607422, "logps_train/policy_2_w": -192.63949584960938, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.257476568222046, "rewards_train/1-l": -2.3860716819763184, "rewards_train/1-w": 2.276611804962158, "rewards_train/2-2": 1.9671586751937866, "rewards_train/2-w": -0.7325056791305542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.662683486938477, "rewards_train/margins_1": 3.534088373184204, "rewards_train/margins_2": 2.699664354324341, "step": 294 }, { "epoch": 0.88, "logps_train/policy_1_2": -193.33547973632812, "logps_train/policy_1_l": -180.1675567626953, "logps_train/policy_1_w": -135.95103454589844, "logps_train/policy_2_2": -130.5065155029297, "logps_train/policy_2_w": -192.21359252929688, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.1495641469955444, "rewards_train/1-l": -1.9792554378509521, "rewards_train/1-w": 2.4791150093078613, "rewards_train/2-2": 2.7859699726104736, "rewards_train/2-w": -0.8830772638320923, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.4583704471588135, "rewards_train/margins_1": 3.6286791563034058, "rewards_train/margins_2": 3.669047236442566, "step": 294 }, { "epoch": 0.88, "logps_train/policy_1_2": -137.81336975097656, "logps_train/policy_1_l": -114.32099914550781, "logps_train/policy_1_w": -68.85790252685547, "logps_train/policy_2_2": -86.80502319335938, "logps_train/policy_2_w": -109.05868530273438, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -101.5, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -102.0, "rewards_train/1-2": -1.3326066732406616, "rewards_train/1-l": -1.2972893714904785, "rewards_train/1-w": 1.6314911842346191, "rewards_train/2-2": 2.006314277648926, "rewards_train/2-w": -0.6941269636154175, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.9287805557250977, "rewards_train/margins_1": 2.9640978574752808, "rewards_train/margins_2": 2.7004412412643433, "step": 294 }, { "epoch": 0.88, "logps_train/policy_1_2": -180.74325561523438, "logps_train/policy_1_l": -191.31185913085938, "logps_train/policy_1_w": -127.19063568115234, "logps_train/policy_2_2": -117.13678741455078, "logps_train/policy_2_w": -201.9600830078125, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.3098735809326172, "rewards_train/1-l": -2.000229835510254, "rewards_train/1-w": 2.4578893184661865, "rewards_train/2-2": 2.2963309288024902, "rewards_train/2-w": -1.5874139070510864, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.45811915397644, "rewards_train/margins_1": 3.7677628993988037, "rewards_train/margins_2": 3.8837448358535767, "step": 294 }, { "epoch": 0.88, "logps_train/policy_1_2": -222.97804260253906, "logps_train/policy_1_l": -216.54269409179688, "logps_train/policy_1_w": -136.89962768554688, "logps_train/policy_2_2": -153.97384643554688, "logps_train/policy_2_w": -198.06423950195312, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -191.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -1.148585557937622, "rewards_train/1-l": -2.543721914291382, "rewards_train/1-w": 2.4842562675476074, "rewards_train/2-2": 2.8678486347198486, "rewards_train/2-w": -0.5236127376556396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.027978181838989, "rewards_train/margins_1": 3.6328418254852295, "rewards_train/margins_2": 3.3914613723754883, "step": 295 }, { "epoch": 0.88, "logps_train/policy_1_2": -215.71432495117188, "logps_train/policy_1_l": -202.83837890625, "logps_train/policy_1_w": -162.51002502441406, "logps_train/policy_2_2": -146.51907348632812, "logps_train/policy_2_w": -245.84872436523438, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -227.0, "rewards_train/1-2": -1.2909636497497559, "rewards_train/1-l": -2.193800210952759, "rewards_train/1-w": 2.913060188293457, "rewards_train/2-2": 2.845944404602051, "rewards_train/2-w": -1.8461999893188477, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.106860399246216, "rewards_train/margins_1": 4.204023838043213, "rewards_train/margins_2": 4.692144393920898, "step": 295 }, { "epoch": 0.88, "logps_train/policy_1_2": -151.61752319335938, "logps_train/policy_1_l": -109.24630737304688, "logps_train/policy_1_w": -102.6634521484375, "logps_train/policy_2_2": -106.3927993774414, "logps_train/policy_2_w": -153.90460205078125, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -0.5305014848709106, "rewards_train/1-l": -1.7537572383880615, "rewards_train/1-w": 2.3817014694213867, "rewards_train/2-2": 2.470876693725586, "rewards_train/2-w": -0.39133769273757935, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.135458707809448, "rewards_train/margins_1": 2.9122029542922974, "rewards_train/margins_2": 2.8622143864631653, "step": 295 }, { "epoch": 0.88, "logps_train/policy_1_2": -191.283203125, "logps_train/policy_1_l": -184.67990112304688, "logps_train/policy_1_w": -144.37191772460938, "logps_train/policy_2_2": -134.6832733154297, "logps_train/policy_2_w": -202.69740295410156, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -0.9220708012580872, "rewards_train/1-l": -1.9610936641693115, "rewards_train/1-w": 2.3202288150787354, "rewards_train/2-2": 2.609602689743042, "rewards_train/2-w": -1.1900533437728882, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.281322479248047, "rewards_train/margins_1": 3.2422996163368225, "rewards_train/margins_2": 3.79965603351593, "step": 295 }, { "epoch": 0.89, "learning_rate": 3.182759993671458e-06, "loss": 0.9239, "step": 296 }, { "epoch": 0.89, "logps_train/policy_1_2": -168.38723754882812, "logps_train/policy_1_l": -152.20428466796875, "logps_train/policy_1_w": -90.77607727050781, "logps_train/policy_2_2": -116.239990234375, "logps_train/policy_2_w": -135.54684448242188, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": -1.0238796472549438, "rewards_train/1-l": -1.640447735786438, "rewards_train/1-w": 1.9907513856887817, "rewards_train/2-2": 2.3642821311950684, "rewards_train/2-w": -0.8031225800514221, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.6311991214752197, "rewards_train/margins_1": 3.0146310329437256, "rewards_train/margins_2": 3.1674047112464905, "step": 296 }, { "epoch": 0.89, "logps_train/policy_1_2": -146.32278442382812, "logps_train/policy_1_l": -134.73716735839844, "logps_train/policy_1_w": -100.26385498046875, "logps_train/policy_2_2": -109.83790588378906, "logps_train/policy_2_w": -144.56590270996094, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": -0.1494661271572113, "rewards_train/1-l": -1.5050643682479858, "rewards_train/1-w": 1.6787906885147095, "rewards_train/2-2": 2.128319025039673, "rewards_train/2-w": -0.81684410572052, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.1838550567626953, "rewards_train/margins_1": 1.8282568156719208, "rewards_train/margins_2": 2.945163130760193, "step": 296 }, { "epoch": 0.89, "logps_train/policy_1_2": -235.651123046875, "logps_train/policy_1_l": -188.4324951171875, "logps_train/policy_1_w": -120.8265609741211, "logps_train/policy_2_2": -166.83660888671875, "logps_train/policy_2_w": -185.0601806640625, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.4987056255340576, "rewards_train/1-l": -1.9273324012756348, "rewards_train/1-w": 2.227109432220459, "rewards_train/2-2": 2.7581377029418945, "rewards_train/2-w": -1.465001106262207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.154441833496094, "rewards_train/margins_1": 3.7258150577545166, "rewards_train/margins_2": 4.223138809204102, "step": 296 }, { "epoch": 0.89, "logps_train/policy_1_2": -156.14915466308594, "logps_train/policy_1_l": -164.7525634765625, "logps_train/policy_1_w": -139.29122924804688, "logps_train/policy_2_2": -108.4749755859375, "logps_train/policy_2_w": -180.40399169921875, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -0.5746816396713257, "rewards_train/1-l": -1.4097785949707031, "rewards_train/1-w": 1.8318150043487549, "rewards_train/2-2": 2.4032845497131348, "rewards_train/2-w": -0.7153982520103455, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.241593599319458, "rewards_train/margins_1": 2.4064966440200806, "rewards_train/margins_2": 3.1186828017234802, "step": 296 }, { "epoch": 0.89, "logps_train/policy_1_2": -156.56887817382812, "logps_train/policy_1_l": -158.79278564453125, "logps_train/policy_1_w": -106.81727600097656, "logps_train/policy_2_2": -112.37542724609375, "logps_train/policy_2_w": -155.40830993652344, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.6920444965362549, "rewards_train/1-l": -1.8069394826889038, "rewards_train/1-w": 1.8894388675689697, "rewards_train/2-2": 1.9804258346557617, "rewards_train/2-w": -0.7695426940917969, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6963783502578735, "rewards_train/margins_1": 2.5814833641052246, "rewards_train/margins_2": 2.7499685287475586, "step": 297 }, { "epoch": 0.89, "logps_train/policy_1_2": -222.49752807617188, "logps_train/policy_1_l": -159.30711364746094, "logps_train/policy_1_w": -132.51058959960938, "logps_train/policy_2_2": -142.58921813964844, "logps_train/policy_2_w": -210.95074462890625, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -2.4747517108917236, "rewards_train/1-l": -1.7098124027252197, "rewards_train/1-w": 2.562222957611084, "rewards_train/2-2": 2.609095811843872, "rewards_train/2-w": -2.4076225757598877, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.272035360336304, "rewards_train/margins_1": 5.036974668502808, "rewards_train/margins_2": 5.01671838760376, "step": 297 }, { "epoch": 0.89, "logps_train/policy_1_2": -200.31101989746094, "logps_train/policy_1_l": -155.4211883544922, "logps_train/policy_1_w": -133.77944946289062, "logps_train/policy_2_2": -146.81033325195312, "logps_train/policy_2_w": -188.4405975341797, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -0.6221665143966675, "rewards_train/1-l": -1.2475876808166504, "rewards_train/1-w": 2.8099451065063477, "rewards_train/2-2": 2.7660117149353027, "rewards_train/2-w": -0.6440601348876953, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.057532787322998, "rewards_train/margins_1": 3.432111620903015, "rewards_train/margins_2": 3.410071849822998, "step": 297 }, { "epoch": 0.89, "logps_train/policy_1_2": -212.5198516845703, "logps_train/policy_1_l": -160.32627868652344, "logps_train/policy_1_w": -99.47456359863281, "logps_train/policy_2_2": -145.05467224121094, "logps_train/policy_2_w": -164.08627319335938, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -1.2840168476104736, "rewards_train/1-l": -1.6499619483947754, "rewards_train/1-w": 2.3853564262390137, "rewards_train/2-2": 3.0226573944091797, "rewards_train/2-w": -1.2207362651824951, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.035318374633789, "rewards_train/margins_1": 3.6693732738494873, "rewards_train/margins_2": 4.243393659591675, "step": 297 }, { "epoch": 0.89, "learning_rate": 3.1589679151726693e-06, "loss": 0.9226, "step": 298 }, { "epoch": 0.89, "logps_train/policy_1_2": -107.30996704101562, "logps_train/policy_1_l": -82.87638854980469, "logps_train/policy_1_w": -73.39706420898438, "logps_train/policy_2_2": -71.46150207519531, "logps_train/policy_2_w": -109.82424926757812, "logps_train/ref_1_2": -101.0, "logps_train/ref_1_l": -75.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -104.5, "rewards_train/1-2": -0.6044338941574097, "rewards_train/1-l": -0.8169360160827637, "rewards_train/1-w": 1.5028712749481201, "rewards_train/2-2": 1.588297963142395, "rewards_train/2-w": -0.5136744976043701, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.319807291030884, "rewards_train/margins_1": 2.10730516910553, "rewards_train/margins_2": 2.101972460746765, "step": 298 }, { "epoch": 0.89, "logps_train/policy_1_2": -232.6138916015625, "logps_train/policy_1_l": -187.7763214111328, "logps_train/policy_1_w": -160.90089416503906, "logps_train/policy_2_2": -171.08816528320312, "logps_train/policy_2_w": -223.75157165527344, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -0.2465467005968094, "rewards_train/1-l": -1.5104939937591553, "rewards_train/1-w": 3.689988613128662, "rewards_train/2-2": 3.307590961456299, "rewards_train/2-w": -0.19976699352264404, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.200482606887817, "rewards_train/margins_1": 3.9365353137254715, "rewards_train/margins_2": 3.507357954978943, "step": 298 }, { "epoch": 0.89, "logps_train/policy_1_2": -178.25845336914062, "logps_train/policy_1_l": -189.65213012695312, "logps_train/policy_1_w": -133.3024444580078, "logps_train/policy_2_2": -124.83109283447266, "logps_train/policy_2_w": -199.61688232421875, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -0.5860003232955933, "rewards_train/1-l": -2.066777229309082, "rewards_train/1-w": 2.855009078979492, "rewards_train/2-2": 3.0504839420318604, "rewards_train/2-w": -1.1519231796264648, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.921786308288574, "rewards_train/margins_1": 3.4410094022750854, "rewards_train/margins_2": 4.202407121658325, "step": 298 }, { "epoch": 0.89, "logps_train/policy_1_2": -139.87344360351562, "logps_train/policy_1_l": -153.86050415039062, "logps_train/policy_1_w": -129.48582458496094, "logps_train/policy_2_2": -91.39016723632812, "logps_train/policy_2_w": -186.6610107421875, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -0.7471105456352234, "rewards_train/1-l": -1.2889807224273682, "rewards_train/1-w": 2.5527851581573486, "rewards_train/2-2": 1.8953583240509033, "rewards_train/2-w": -0.9869994521141052, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.841765880584717, "rewards_train/margins_1": 3.299895703792572, "rewards_train/margins_2": 2.8823577761650085, "step": 298 }, { "epoch": 0.9, "logps_train/policy_1_2": -143.08016967773438, "logps_train/policy_1_l": -103.10142517089844, "logps_train/policy_1_w": -106.40220642089844, "logps_train/policy_2_2": -92.2772216796875, "logps_train/policy_2_w": -172.27981567382812, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.3600198030471802, "rewards_train/1-l": -1.1251327991485596, "rewards_train/1-w": 2.4302868843078613, "rewards_train/2-2": 1.9328731298446655, "rewards_train/2-w": -1.535404086112976, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.555419683456421, "rewards_train/margins_1": 3.7903066873550415, "rewards_train/margins_2": 3.4682772159576416, "step": 299 }, { "epoch": 0.9, "logps_train/policy_1_2": -157.25164794921875, "logps_train/policy_1_l": -153.47140502929688, "logps_train/policy_1_w": -150.91143798828125, "logps_train/policy_2_2": -102.21633911132812, "logps_train/policy_2_w": -213.6963653564453, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.2521183490753174, "rewards_train/1-l": -1.2701377868652344, "rewards_train/1-w": 0.9235043525695801, "rewards_train/2-2": 1.793796181678772, "rewards_train/2-w": -2.980379104614258, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.1936421394348145, "rewards_train/margins_1": 2.1756227016448975, "rewards_train/margins_2": 4.77417528629303, "step": 299 }, { "epoch": 0.9, "logps_train/policy_1_2": -191.63877868652344, "logps_train/policy_1_l": -167.85284423828125, "logps_train/policy_1_w": -118.21485900878906, "logps_train/policy_2_2": -133.50997924804688, "logps_train/policy_2_w": -173.11907958984375, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -0.8826282024383545, "rewards_train/1-l": -1.8684884309768677, "rewards_train/1-w": 2.1734366416931152, "rewards_train/2-2": 2.4388465881347656, "rewards_train/2-w": -1.1056586503982544, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.041925072669983, "rewards_train/margins_1": 3.0560648441314697, "rewards_train/margins_2": 3.54450523853302, "step": 299 }, { "epoch": 0.9, "logps_train/policy_1_2": -185.8775634765625, "logps_train/policy_1_l": -147.36209106445312, "logps_train/policy_1_w": -115.88890838623047, "logps_train/policy_2_2": -126.11593627929688, "logps_train/policy_2_w": -172.04229736328125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -1.015881061553955, "rewards_train/1-l": -1.656327247619629, "rewards_train/1-w": 2.5929455757141113, "rewards_train/2-2": 2.989969491958618, "rewards_train/2-w": -0.9215145707130432, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.24927282333374, "rewards_train/margins_1": 3.6088266372680664, "rewards_train/margins_2": 3.9114840626716614, "step": 299 }, { "epoch": 0.9, "learning_rate": 3.1351115225670483e-06, "loss": 1.229, "step": 300 }, { "epoch": 0.9, "logps_train/policy_1_2": -142.93002319335938, "logps_train/policy_1_l": -117.52227783203125, "logps_train/policy_1_w": -100.1101303100586, "logps_train/policy_2_2": -103.47369384765625, "logps_train/policy_2_w": -142.06463623046875, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -0.47386080026626587, "rewards_train/1-l": -0.8707342147827148, "rewards_train/1-w": 1.917746663093567, "rewards_train/2-2": 1.9842712879180908, "rewards_train/2-w": -0.7866887450218201, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.7884808778762817, "rewards_train/margins_1": 2.3916074633598328, "rewards_train/margins_2": 2.770960032939911, "step": 300 }, { "epoch": 0.9, "logps_train/policy_1_2": -162.40182495117188, "logps_train/policy_1_l": -163.4838104248047, "logps_train/policy_1_w": -87.48585510253906, "logps_train/policy_2_2": -122.16709899902344, "logps_train/policy_2_w": -129.24526977539062, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": -0.7131321430206299, "rewards_train/1-l": -2.3166425228118896, "rewards_train/1-w": 2.0539541244506836, "rewards_train/2-2": 1.7320696115493774, "rewards_train/2-w": -0.3616354167461395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.370596647262573, "rewards_train/margins_1": 2.7670862674713135, "rewards_train/margins_2": 2.093705028295517, "step": 300 }, { "epoch": 0.9, "logps_train/policy_1_2": -207.08880615234375, "logps_train/policy_1_l": -218.8592529296875, "logps_train/policy_1_w": -177.8642578125, "logps_train/policy_2_2": -142.83453369140625, "logps_train/policy_2_w": -240.35885620117188, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -1.3963803052902222, "rewards_train/1-l": -2.347743034362793, "rewards_train/1-w": 2.405761241912842, "rewards_train/2-2": 2.4499449729919434, "rewards_train/2-w": -1.446041226387024, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.753504276275635, "rewards_train/margins_1": 3.802141547203064, "rewards_train/margins_2": 3.8959861993789673, "step": 300 }, { "epoch": 0.9, "logps_train/policy_1_2": -180.0716094970703, "logps_train/policy_1_l": -171.50119018554688, "logps_train/policy_1_w": -131.41175842285156, "logps_train/policy_2_2": -122.7449951171875, "logps_train/policy_2_w": -181.5770263671875, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -0.9584316611289978, "rewards_train/1-l": -1.7750216722488403, "rewards_train/1-w": 2.3604841232299805, "rewards_train/2-2": 2.436339855194092, "rewards_train/2-w": -0.9854610562324524, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.135505795478821, "rewards_train/margins_1": 3.3189157843589783, "rewards_train/margins_2": 3.421800911426544, "step": 300 }, { "epoch": 0.9, "logps_train/policy_1_2": -190.31544494628906, "logps_train/policy_1_l": -242.3638458251953, "logps_train/policy_1_w": -156.80889892578125, "logps_train/policy_2_2": -126.97329711914062, "logps_train/policy_2_w": -235.28057861328125, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -215.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -0.9401384592056274, "rewards_train/1-l": -2.703498363494873, "rewards_train/1-w": 2.8870770931243896, "rewards_train/2-2": 2.855210065841675, "rewards_train/2-w": -1.5272762775421143, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.590575456619263, "rewards_train/margins_1": 3.827215552330017, "rewards_train/margins_2": 4.382486343383789, "step": 301 }, { "epoch": 0.9, "logps_train/policy_1_2": -150.60716247558594, "logps_train/policy_1_l": -140.70791625976562, "logps_train/policy_1_w": -123.04581451416016, "logps_train/policy_2_2": -115.91266632080078, "logps_train/policy_2_w": -163.02786254882812, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": 0.2529545724391937, "rewards_train/1-l": -1.4125392436981201, "rewards_train/1-w": 2.600106716156006, "rewards_train/2-2": 2.4638118743896484, "rewards_train/2-w": -0.002005741000175476, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.012645959854126, "rewards_train/margins_1": 2.347152143716812, "rewards_train/margins_2": 2.465817615389824, "step": 301 }, { "epoch": 0.9, "logps_train/policy_1_2": -162.70758056640625, "logps_train/policy_1_l": -104.409912109375, "logps_train/policy_1_w": -105.70582580566406, "logps_train/policy_2_2": -106.09799194335938, "logps_train/policy_2_w": -155.20016479492188, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": -1.524543046951294, "rewards_train/1-l": -0.9732908010482788, "rewards_train/1-w": 1.9377429485321045, "rewards_train/2-2": 2.178823471069336, "rewards_train/2-w": -1.6369125843048096, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.6875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.9110337495803833, "rewards_train/margins_1": 3.4622859954833984, "rewards_train/margins_2": 3.8157360553741455, "step": 301 }, { "epoch": 0.9, "logps_train/policy_1_2": -202.4709930419922, "logps_train/policy_1_l": -148.98077392578125, "logps_train/policy_1_w": -125.14907836914062, "logps_train/policy_2_2": -135.8721466064453, "logps_train/policy_2_w": -194.7888946533203, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -1.8769824504852295, "rewards_train/1-l": -1.1863579750061035, "rewards_train/1-w": 2.529232978820801, "rewards_train/2-2": 2.386808395385742, "rewards_train/2-w": -1.8374826908111572, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.7155909538269043, "rewards_train/margins_1": 4.40621542930603, "rewards_train/margins_2": 4.224291086196899, "step": 301 }, { "epoch": 0.9, "learning_rate": 3.111193144196457e-06, "loss": 1.0028, "step": 302 }, { "epoch": 0.9, "logps_train/policy_1_2": -168.25384521484375, "logps_train/policy_1_l": -137.5250244140625, "logps_train/policy_1_w": -84.5234146118164, "logps_train/policy_2_2": -112.26873779296875, "logps_train/policy_2_w": -131.756591796875, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -117.5, "rewards_train/1-2": -1.6433544158935547, "rewards_train/1-l": -1.3628549575805664, "rewards_train/1-w": 1.1387232542037964, "rewards_train/2-2": 1.7918758392333984, "rewards_train/2-w": -1.4322986602783203, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.501578211784363, "rewards_train/margins_1": 2.782077670097351, "rewards_train/margins_2": 3.2241744995117188, "step": 302 }, { "epoch": 0.9, "logps_train/policy_1_2": -212.16139221191406, "logps_train/policy_1_l": -235.50697326660156, "logps_train/policy_1_w": -168.31890869140625, "logps_train/policy_2_2": -139.26596069335938, "logps_train/policy_2_w": -241.8257598876953, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": -1.5075461864471436, "rewards_train/1-l": -3.172865867614746, "rewards_train/1-w": 2.817328453063965, "rewards_train/2-2": 2.750356674194336, "rewards_train/2-w": -1.8263261318206787, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.990194320678711, "rewards_train/margins_1": 4.324874639511108, "rewards_train/margins_2": 4.576682806015015, "step": 302 }, { "epoch": 0.9, "logps_train/policy_1_2": -212.83029174804688, "logps_train/policy_1_l": -228.89987182617188, "logps_train/policy_1_w": -180.02059936523438, "logps_train/policy_2_2": -160.30972290039062, "logps_train/policy_2_w": -250.58984375, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -213.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -241.0, "rewards_train/1-2": -0.4757544994354248, "rewards_train/1-l": -1.7098103761672974, "rewards_train/1-w": 3.3049721717834473, "rewards_train/2-2": 2.9392175674438477, "rewards_train/2-w": -0.9324210286140442, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.014782547950745, "rewards_train/margins_1": 3.780726671218872, "rewards_train/margins_2": 3.871638596057892, "step": 302 }, { "epoch": 0.9, "logps_train/policy_1_2": -118.45060729980469, "logps_train/policy_1_l": -145.63232421875, "logps_train/policy_1_w": -122.55592346191406, "logps_train/policy_2_2": -82.58580780029297, "logps_train/policy_2_w": -183.41769409179688, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -0.5782152414321899, "rewards_train/1-l": -1.295849084854126, "rewards_train/1-w": 2.516087055206299, "rewards_train/2-2": 1.697425127029419, "rewards_train/2-w": -1.4751689434051514, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.811936140060425, "rewards_train/margins_1": 3.0943022966384888, "rewards_train/margins_2": 3.1725940704345703, "step": 302 }, { "epoch": 0.91, "logps_train/policy_1_2": -227.70220947265625, "logps_train/policy_1_l": -153.76513671875, "logps_train/policy_1_w": -134.73904418945312, "logps_train/policy_2_2": -148.90972900390625, "logps_train/policy_2_w": -197.29376220703125, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -2.0116262435913086, "rewards_train/1-l": -1.3172627687454224, "rewards_train/1-w": 2.6070261001586914, "rewards_train/2-2": 3.016838788986206, "rewards_train/2-w": -1.160641074180603, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9242888689041138, "rewards_train/margins_1": 4.61865234375, "rewards_train/margins_2": 4.177479863166809, "step": 303 }, { "epoch": 0.91, "logps_train/policy_1_2": -181.92483520507812, "logps_train/policy_1_l": -200.72195434570312, "logps_train/policy_1_w": -133.4931640625, "logps_train/policy_2_2": -126.39590454101562, "logps_train/policy_2_w": -199.29644775390625, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -0.7397493124008179, "rewards_train/1-l": -2.1185319423675537, "rewards_train/1-w": 2.823535919189453, "rewards_train/2-2": 2.4979090690612793, "rewards_train/2-w": -1.1148020029067993, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.942067861557007, "rewards_train/margins_1": 3.563285231590271, "rewards_train/margins_2": 3.6127110719680786, "step": 303 }, { "epoch": 0.91, "logps_train/policy_1_2": -183.99229431152344, "logps_train/policy_1_l": -124.708984375, "logps_train/policy_1_w": -122.50041198730469, "logps_train/policy_2_2": -132.78578186035156, "logps_train/policy_2_w": -165.21104431152344, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.0426740646362305, "rewards_train/1-l": -1.3854732513427734, "rewards_train/1-w": 1.8773754835128784, "rewards_train/2-2": 2.4861459732055664, "rewards_train/2-w": -0.5768174529075623, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.262848734855652, "rewards_train/margins_1": 2.920049548149109, "rewards_train/margins_2": 3.0629634261131287, "step": 303 }, { "epoch": 0.91, "logps_train/policy_1_2": -196.84222412109375, "logps_train/policy_1_l": -170.77101135253906, "logps_train/policy_1_w": -137.70455932617188, "logps_train/policy_2_2": -129.36390686035156, "logps_train/policy_2_w": -187.6614532470703, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.7111754417419434, "rewards_train/1-l": -1.9255390167236328, "rewards_train/1-w": 2.3172388076782227, "rewards_train/2-2": 2.4050159454345703, "rewards_train/2-w": -0.7089199423789978, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 4.2427778244018555, "rewards_train/margins_1": 4.028414249420166, "rewards_train/margins_2": 3.113935887813568, "step": 303 }, { "epoch": 0.91, "learning_rate": 3.08721511445246e-06, "loss": 0.921, "step": 304 }, { "epoch": 0.91, "logps_train/policy_1_2": -210.34332275390625, "logps_train/policy_1_l": -145.4271697998047, "logps_train/policy_1_w": -110.51338195800781, "logps_train/policy_2_2": -140.23193359375, "logps_train/policy_2_w": -165.26736450195312, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -1.7161188125610352, "rewards_train/1-l": -1.400303840637207, "rewards_train/1-w": 2.190441131591797, "rewards_train/2-2": 2.761230707168579, "rewards_train/2-w": -1.2181427478790283, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.590744972229004, "rewards_train/margins_1": 3.906559944152832, "rewards_train/margins_2": 3.9793734550476074, "step": 304 }, { "epoch": 0.91, "logps_train/policy_1_2": -178.2301788330078, "logps_train/policy_1_l": -174.60418701171875, "logps_train/policy_1_w": -124.40576171875, "logps_train/policy_2_2": -127.91476440429688, "logps_train/policy_2_w": -190.13047790527344, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -0.7909874320030212, "rewards_train/1-l": -2.227409601211548, "rewards_train/1-w": 2.228955030441284, "rewards_train/2-2": 2.1346957683563232, "rewards_train/2-w": -1.5661731958389282, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.456364631652832, "rewards_train/margins_1": 3.0199424624443054, "rewards_train/margins_2": 3.7008689641952515, "step": 304 }, { "epoch": 0.91, "logps_train/policy_1_2": -168.63851928710938, "logps_train/policy_1_l": -148.950927734375, "logps_train/policy_1_w": -119.34378051757812, "logps_train/policy_2_2": -122.32080841064453, "logps_train/policy_2_w": -173.39303588867188, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -0.6335775256156921, "rewards_train/1-l": -1.2948966026306152, "rewards_train/1-w": 1.7297810316085815, "rewards_train/2-2": 1.8684074878692627, "rewards_train/2-w": -1.6410608291625977, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.0246776342391968, "rewards_train/margins_1": 2.3633585572242737, "rewards_train/margins_2": 3.5094683170318604, "step": 304 }, { "epoch": 0.91, "logps_train/policy_1_2": -197.4188690185547, "logps_train/policy_1_l": -196.8333740234375, "logps_train/policy_1_w": -121.11956787109375, "logps_train/policy_2_2": -135.68698120117188, "logps_train/policy_2_w": -178.26455688476562, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.2403242588043213, "rewards_train/1-l": -2.605581045150757, "rewards_train/1-w": 2.7153875827789307, "rewards_train/2-2": 2.4129421710968018, "rewards_train/2-w": -0.7971580624580383, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.3209686279296875, "rewards_train/margins_1": 3.955711841583252, "rewards_train/margins_2": 3.21010023355484, "step": 304 }, { "epoch": 0.91, "logps_train/policy_1_2": -177.43557739257812, "logps_train/policy_1_l": -126.32366180419922, "logps_train/policy_1_w": -132.7772216796875, "logps_train/policy_2_2": -132.8383331298828, "logps_train/policy_2_w": -177.76190185546875, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -112.5, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -0.3876979947090149, "rewards_train/1-l": -1.3876392841339111, "rewards_train/1-w": 2.0492300987243652, "rewards_train/2-2": 2.341947555541992, "rewards_train/2-w": -0.5683778524398804, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4368693828582764, "rewards_train/margins_1": 2.43692809343338, "rewards_train/margins_2": 2.9103254079818726, "step": 305 }, { "epoch": 0.91, "logps_train/policy_1_2": -184.47796630859375, "logps_train/policy_1_l": -106.00178527832031, "logps_train/policy_1_w": -100.83238983154297, "logps_train/policy_2_2": -121.78223419189453, "logps_train/policy_2_w": -161.4182891845703, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.5045595169067383, "rewards_train/1-l": -0.8924633860588074, "rewards_train/1-w": 2.20999813079834, "rewards_train/2-2": 2.482860565185547, "rewards_train/2-w": -1.3549644947052002, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.102461516857147, "rewards_train/margins_1": 3.714557647705078, "rewards_train/margins_2": 3.837825059890747, "step": 305 }, { "epoch": 0.91, "logps_train/policy_1_2": -178.10328674316406, "logps_train/policy_1_l": -106.98179626464844, "logps_train/policy_1_w": -85.87797546386719, "logps_train/policy_2_2": -123.45993041992188, "logps_train/policy_2_w": -132.247802734375, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": -0.9099382758140564, "rewards_train/1-l": -1.2646350860595703, "rewards_train/1-w": 1.9243122339248657, "rewards_train/2-2": 2.4951205253601074, "rewards_train/2-w": -0.47165587544441223, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.188947319984436, "rewards_train/margins_1": 2.834250509738922, "rewards_train/margins_2": 2.9667764008045197, "step": 305 }, { "epoch": 0.91, "logps_train/policy_1_2": -176.52146911621094, "logps_train/policy_1_l": -162.38934326171875, "logps_train/policy_1_w": -96.65718078613281, "logps_train/policy_2_2": -116.33111572265625, "logps_train/policy_2_w": -154.51824951171875, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.0342278480529785, "rewards_train/1-l": -1.8930737972259521, "rewards_train/1-w": 1.7861990928649902, "rewards_train/2-2": 2.5999693870544434, "rewards_train/2-w": -1.4650084972381592, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.6792728900909424, "rewards_train/margins_1": 2.8204269409179688, "rewards_train/margins_2": 4.0649778842926025, "step": 305 }, { "epoch": 0.92, "learning_rate": 3.0631797735484877e-06, "loss": 1.1705, "step": 306 }, { "epoch": 0.92, "logps_train/policy_1_2": -253.40899658203125, "logps_train/policy_1_l": -202.42791748046875, "logps_train/policy_1_w": -162.70700073242188, "logps_train/policy_2_2": -177.06784057617188, "logps_train/policy_2_w": -232.84283447265625, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -2.006134033203125, "rewards_train/1-l": -1.9838066101074219, "rewards_train/1-w": 3.062112331390381, "rewards_train/2-2": 2.843996524810791, "rewards_train/2-w": -1.4522533416748047, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.045918941497803, "rewards_train/margins_1": 5.068246364593506, "rewards_train/margins_2": 4.296249866485596, "step": 306 }, { "epoch": 0.92, "logps_train/policy_1_2": -128.1678009033203, "logps_train/policy_1_l": -98.6283187866211, "logps_train/policy_1_w": -78.68515014648438, "logps_train/policy_2_2": -82.44542694091797, "logps_train/policy_2_w": -124.24935913085938, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -82.5, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": -0.9824054837226868, "rewards_train/1-l": -1.6076449155807495, "rewards_train/1-w": 2.2994532585144043, "rewards_train/2-2": 1.7500863075256348, "rewards_train/2-w": -0.7889986038208008, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.907098174095154, "rewards_train/margins_1": 3.281858742237091, "rewards_train/margins_2": 2.5390849113464355, "step": 306 }, { "epoch": 0.92, "logps_train/policy_1_2": -197.0485382080078, "logps_train/policy_1_l": -182.79214477539062, "logps_train/policy_1_w": -158.57415771484375, "logps_train/policy_2_2": -133.4656219482422, "logps_train/policy_2_w": -238.57156372070312, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -1.2446978092193604, "rewards_train/1-l": -2.2046523094177246, "rewards_train/1-w": 3.73789644241333, "rewards_train/2-2": 2.515547037124634, "rewards_train/2-w": -1.6102819442749023, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.942548751831055, "rewards_train/margins_1": 4.98259425163269, "rewards_train/margins_2": 4.125828981399536, "step": 306 }, { "epoch": 0.92, "logps_train/policy_1_2": -180.679443359375, "logps_train/policy_1_l": -158.00021362304688, "logps_train/policy_1_w": -143.73770141601562, "logps_train/policy_2_2": -127.12796783447266, "logps_train/policy_2_w": -209.45172119140625, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.3476331233978271, "rewards_train/1-l": -1.7047085762023926, "rewards_train/1-w": 3.0869717597961426, "rewards_train/2-2": 2.025289535522461, "rewards_train/2-w": -1.1623578071594238, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.791680335998535, "rewards_train/margins_1": 4.43460488319397, "rewards_train/margins_2": 3.1876473426818848, "step": 306 }, { "epoch": 0.92, "logps_train/policy_1_2": -150.02102661132812, "logps_train/policy_1_l": -158.32196044921875, "logps_train/policy_1_w": -121.64717864990234, "logps_train/policy_2_2": -106.82389831542969, "logps_train/policy_2_w": -181.3160400390625, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -0.09702498465776443, "rewards_train/1-l": -2.016327381134033, "rewards_train/1-w": 2.729813575744629, "rewards_train/2-2": 2.5665600299835205, "rewards_train/2-w": -0.802503228187561, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.746140956878662, "rewards_train/margins_1": 2.8268385604023933, "rewards_train/margins_2": 3.3690632581710815, "step": 307 }, { "epoch": 0.92, "logps_train/policy_1_2": -241.31451416015625, "logps_train/policy_1_l": -227.27493286132812, "logps_train/policy_1_w": -128.02413940429688, "logps_train/policy_2_2": -159.99957275390625, "logps_train/policy_2_w": -208.73236083984375, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.9736385345458984, "rewards_train/1-l": -2.728274345397949, "rewards_train/1-w": 2.600710868835449, "rewards_train/2-2": 2.828166961669922, "rewards_train/2-w": -1.7701106071472168, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.328985214233398, "rewards_train/margins_1": 4.574349403381348, "rewards_train/margins_2": 4.598277568817139, "step": 307 }, { "epoch": 0.92, "logps_train/policy_1_2": -190.0673828125, "logps_train/policy_1_l": -191.59146118164062, "logps_train/policy_1_w": -132.85311889648438, "logps_train/policy_2_2": -127.33960723876953, "logps_train/policy_2_w": -205.02993774414062, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -1.7145507335662842, "rewards_train/1-l": -1.8407868146896362, "rewards_train/1-w": 3.3248448371887207, "rewards_train/2-2": 1.8781492710113525, "rewards_train/2-w": -0.9979153871536255, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.165631651878357, "rewards_train/margins_1": 5.039395570755005, "rewards_train/margins_2": 2.876064658164978, "step": 307 }, { "epoch": 0.92, "logps_train/policy_1_2": -125.59081268310547, "logps_train/policy_1_l": -89.49140930175781, "logps_train/policy_1_w": -77.13658142089844, "logps_train/policy_2_2": -77.76144409179688, "logps_train/policy_2_w": -116.43386840820312, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -109.5, "rewards_train/1-2": -1.162010669708252, "rewards_train/1-l": -0.8840292692184448, "rewards_train/1-w": 1.3914196491241455, "rewards_train/2-2": 1.9244418144226074, "rewards_train/2-w": -0.6986608505249023, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.2754489183425903, "rewards_train/margins_1": 2.5534303188323975, "rewards_train/margins_2": 2.6231026649475098, "step": 307 }, { "epoch": 0.92, "learning_rate": 3.0390894672914427e-06, "loss": 0.862, "step": 308 }, { "epoch": 0.92, "logps_train/policy_1_2": -175.50616455078125, "logps_train/policy_1_l": -149.017578125, "logps_train/policy_1_w": -107.9320068359375, "logps_train/policy_2_2": -118.4968490600586, "logps_train/policy_2_w": -147.15530395507812, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -1.2978827953338623, "rewards_train/1-l": -1.5280272960662842, "rewards_train/1-w": 1.719983458518982, "rewards_train/2-2": 2.2573466300964355, "rewards_train/2-w": -0.43545278906822205, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.248010754585266, "rewards_train/margins_1": 3.0178662538528442, "rewards_train/margins_2": 2.6927994191646576, "step": 308 }, { "epoch": 0.92, "logps_train/policy_1_2": -202.46743774414062, "logps_train/policy_1_l": -169.59271240234375, "logps_train/policy_1_w": -135.5215301513672, "logps_train/policy_2_2": -148.38336181640625, "logps_train/policy_2_w": -178.5050506591797, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.125845193862915, "rewards_train/1-l": -1.690398931503296, "rewards_train/1-w": 2.285444736480713, "rewards_train/2-2": 2.4378347396850586, "rewards_train/2-w": -0.21613028645515442, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.975843667984009, "rewards_train/margins_1": 3.411289930343628, "rewards_train/margins_2": 2.653965026140213, "step": 308 }, { "epoch": 0.92, "logps_train/policy_1_2": -230.6491241455078, "logps_train/policy_1_l": -187.05776977539062, "logps_train/policy_1_w": -151.50270080566406, "logps_train/policy_2_2": -160.43968200683594, "logps_train/policy_2_w": -213.14898681640625, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -0.7227238416671753, "rewards_train/1-l": -2.087904691696167, "rewards_train/1-w": 2.9788317680358887, "rewards_train/2-2": 3.263063430786133, "rewards_train/2-w": -0.6000561118125916, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.066736459732056, "rewards_train/margins_1": 3.701555609703064, "rewards_train/margins_2": 3.8631195425987244, "step": 308 }, { "epoch": 0.92, "logps_train/policy_1_2": -241.61305236816406, "logps_train/policy_1_l": -200.2239227294922, "logps_train/policy_1_w": -184.3616943359375, "logps_train/policy_2_2": -170.43023681640625, "logps_train/policy_2_w": -252.94998168945312, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": -1.5769305229187012, "rewards_train/1-l": -1.8682911396026611, "rewards_train/1-w": 2.9212536811828613, "rewards_train/2-2": 3.1339292526245117, "rewards_train/2-w": -1.8231234550476074, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.7895448207855225, "rewards_train/margins_1": 4.4981842041015625, "rewards_train/margins_2": 4.957052707672119, "step": 308 }, { "epoch": 0.93, "logps_train/policy_1_2": -216.61138916015625, "logps_train/policy_1_l": -211.734375, "logps_train/policy_1_w": -150.88902282714844, "logps_train/policy_2_2": -143.31869506835938, "logps_train/policy_2_w": -231.2208709716797, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -1.7435111999511719, "rewards_train/1-l": -1.7646484375, "rewards_train/1-w": 2.2921030521392822, "rewards_train/2-2": 2.543032646179199, "rewards_train/2-w": -2.263298511505127, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.056751489639282, "rewards_train/margins_1": 4.035614252090454, "rewards_train/margins_2": 4.806331157684326, "step": 309 }, { "epoch": 0.93, "logps_train/policy_1_2": -169.6429443359375, "logps_train/policy_1_l": -169.80816650390625, "logps_train/policy_1_w": -128.3182373046875, "logps_train/policy_2_2": -110.49202728271484, "logps_train/policy_2_w": -191.9595489501953, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -0.995844304561615, "rewards_train/1-l": -1.5390512943267822, "rewards_train/1-w": 2.347522735595703, "rewards_train/2-2": 2.5809545516967773, "rewards_train/2-w": -1.3173422813415527, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8865740299224854, "rewards_train/margins_1": 3.343367040157318, "rewards_train/margins_2": 3.89829683303833, "step": 309 }, { "epoch": 0.93, "logps_train/policy_1_2": -219.0057830810547, "logps_train/policy_1_l": -188.2763671875, "logps_train/policy_1_w": -142.66241455078125, "logps_train/policy_2_2": -154.6062774658203, "logps_train/policy_2_w": -212.31918334960938, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -1.3531173467636108, "rewards_train/1-l": -1.9287116527557373, "rewards_train/1-w": 3.0927443504333496, "rewards_train/2-2": 2.851872205734253, "rewards_train/2-w": -1.0217617750167847, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.021456003189087, "rewards_train/margins_1": 4.4458616971969604, "rewards_train/margins_2": 3.8736339807510376, "step": 309 }, { "epoch": 0.93, "logps_train/policy_1_2": -99.4146957397461, "logps_train/policy_1_l": -97.29705810546875, "logps_train/policy_1_w": -84.09178161621094, "logps_train/policy_2_2": -68.93064880371094, "logps_train/policy_2_w": -139.45147705078125, "logps_train/ref_1_2": -91.5, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -79.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": -0.7842429280281067, "rewards_train/1-l": -1.2462835311889648, "rewards_train/1-w": 1.773439645767212, "rewards_train/2-2": 1.0149425268173218, "rewards_train/2-w": -1.6646791696548462, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.0197231769561768, "rewards_train/margins_1": 2.5576825737953186, "rewards_train/margins_2": 2.679621696472168, "step": 309 }, { "epoch": 0.93, "learning_rate": 3.014946546852746e-06, "loss": 0.9459, "step": 310 }, { "epoch": 0.93, "logps_train/policy_1_2": -222.39141845703125, "logps_train/policy_1_l": -208.62274169921875, "logps_train/policy_1_w": -133.71925354003906, "logps_train/policy_2_2": -147.244384765625, "logps_train/policy_2_w": -207.70736694335938, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": -1.481329083442688, "rewards_train/1-l": -2.693915605545044, "rewards_train/1-w": 2.4628405570983887, "rewards_train/2-2": 2.8833727836608887, "rewards_train/2-w": -1.8969078063964844, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.156756162643433, "rewards_train/margins_1": 3.9441696405410767, "rewards_train/margins_2": 4.780280590057373, "step": 310 }, { "epoch": 0.93, "logps_train/policy_1_2": -156.5992431640625, "logps_train/policy_1_l": -217.75570678710938, "logps_train/policy_1_w": -115.68074798583984, "logps_train/policy_2_2": -111.94153594970703, "logps_train/policy_2_w": -166.06564331054688, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -0.5982043743133545, "rewards_train/1-l": -2.913071632385254, "rewards_train/1-w": 1.9405193328857422, "rewards_train/2-2": 2.1163930892944336, "rewards_train/2-w": -0.9264864325523376, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.625, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.853590965270996, "rewards_train/margins_1": 2.5387237071990967, "rewards_train/margins_2": 3.0428795218467712, "step": 310 }, { "epoch": 0.93, "logps_train/policy_1_2": -198.87826538085938, "logps_train/policy_1_l": -179.8119659423828, "logps_train/policy_1_w": -165.96823120117188, "logps_train/policy_2_2": -134.68128967285156, "logps_train/policy_2_w": -220.8040008544922, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": -1.8124358654022217, "rewards_train/1-l": -1.6186962127685547, "rewards_train/1-w": 2.5199737548828125, "rewards_train/2-2": 2.454526901245117, "rewards_train/2-w": -1.0264941453933716, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.138669967651367, "rewards_train/margins_1": 4.332409620285034, "rewards_train/margins_2": 3.4810210466384888, "step": 310 }, { "epoch": 0.93, "logps_train/policy_1_2": -190.0849609375, "logps_train/policy_1_l": -203.6287841796875, "logps_train/policy_1_w": -161.0382537841797, "logps_train/policy_2_2": -143.84420776367188, "logps_train/policy_2_w": -229.32595825195312, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": -0.381153404712677, "rewards_train/1-l": -2.1564807891845703, "rewards_train/1-w": 3.2608227729797363, "rewards_train/2-2": 2.6108908653259277, "rewards_train/2-w": -0.8570096492767334, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.417303562164307, "rewards_train/margins_1": 3.6419761776924133, "rewards_train/margins_2": 3.467900514602661, "step": 310 }, { "epoch": 0.93, "logps_train/policy_1_2": -175.8544158935547, "logps_train/policy_1_l": -129.99461364746094, "logps_train/policy_1_w": -79.34732055664062, "logps_train/policy_2_2": -110.28645324707031, "logps_train/policy_2_w": -130.98997497558594, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": -1.5705982446670532, "rewards_train/1-l": -1.2539536952972412, "rewards_train/1-w": 1.6692713499069214, "rewards_train/2-2": 2.404557704925537, "rewards_train/2-w": -1.5743881464004517, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.9232250452041626, "rewards_train/margins_1": 3.2398695945739746, "rewards_train/margins_2": 3.9789458513259888, "step": 311 }, { "epoch": 0.93, "logps_train/policy_1_2": -160.97885131835938, "logps_train/policy_1_l": -200.40870666503906, "logps_train/policy_1_w": -150.35403442382812, "logps_train/policy_2_2": -113.94092559814453, "logps_train/policy_2_w": -224.03179931640625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -0.1353854387998581, "rewards_train/1-l": -1.6005384922027588, "rewards_train/1-w": 2.714204788208008, "rewards_train/2-2": 2.500828742980957, "rewards_train/2-w": -1.7078683376312256, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.314743280410767, "rewards_train/margins_1": 2.849590227007866, "rewards_train/margins_2": 4.208697080612183, "step": 311 }, { "epoch": 0.93, "logps_train/policy_1_2": -178.38555908203125, "logps_train/policy_1_l": -180.59432983398438, "logps_train/policy_1_w": -154.02537536621094, "logps_train/policy_2_2": -121.08539581298828, "logps_train/policy_2_w": -227.327392578125, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": -1.3787899017333984, "rewards_train/1-l": -2.5707616806030273, "rewards_train/1-w": 2.9634780883789062, "rewards_train/2-2": 2.036431312561035, "rewards_train/2-w": -1.2112551927566528, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.534239768981934, "rewards_train/margins_1": 4.342267990112305, "rewards_train/margins_2": 3.247686505317688, "step": 311 }, { "epoch": 0.93, "logps_train/policy_1_2": -181.19358825683594, "logps_train/policy_1_l": -144.62295532226562, "logps_train/policy_1_w": -123.59573364257812, "logps_train/policy_2_2": -128.97567749023438, "logps_train/policy_2_w": -178.28201293945312, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -0.6088124513626099, "rewards_train/1-l": -2.165079116821289, "rewards_train/1-w": 2.530514717102051, "rewards_train/2-2": 2.5778231620788574, "rewards_train/2-w": -0.9597695469856262, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.69559383392334, "rewards_train/margins_1": 3.1393271684646606, "rewards_train/margins_2": 3.5375927090644836, "step": 311 }, { "epoch": 0.93, "learning_rate": 2.990753368538872e-06, "loss": 0.8644, "step": 312 }, { "epoch": 0.93, "logps_train/policy_1_2": -182.06610107421875, "logps_train/policy_1_l": -171.32839965820312, "logps_train/policy_1_w": -104.80967712402344, "logps_train/policy_2_2": -110.89762115478516, "logps_train/policy_2_w": -157.24234008789062, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -1.3085640668869019, "rewards_train/1-l": -2.317018747329712, "rewards_train/1-w": 2.1776747703552246, "rewards_train/2-2": 2.7970662117004395, "rewards_train/2-w": -1.0125148296356201, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.4946935176849365, "rewards_train/margins_1": 3.4862388372421265, "rewards_train/margins_2": 3.8095810413360596, "step": 312 }, { "epoch": 0.93, "logps_train/policy_1_2": -181.9605712890625, "logps_train/policy_1_l": -165.0253143310547, "logps_train/policy_1_w": -139.22344970703125, "logps_train/policy_2_2": -134.3592529296875, "logps_train/policy_2_w": -200.5415496826172, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -0.6066032648086548, "rewards_train/1-l": -1.7515544891357422, "rewards_train/1-w": 2.8057796955108643, "rewards_train/2-2": 2.314075469970703, "rewards_train/2-w": -1.473686695098877, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.5573341846466064, "rewards_train/margins_1": 3.412382960319519, "rewards_train/margins_2": 3.78776216506958, "step": 312 }, { "epoch": 0.93, "logps_train/policy_1_2": -226.9684600830078, "logps_train/policy_1_l": -170.9005889892578, "logps_train/policy_1_w": -124.96367645263672, "logps_train/policy_2_2": -160.42852783203125, "logps_train/policy_2_w": -175.12741088867188, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.7765324115753174, "rewards_train/1-l": -1.5390819311141968, "rewards_train/1-w": 2.4766793251037598, "rewards_train/2-2": 2.6419124603271484, "rewards_train/2-w": -0.5232880115509033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.0157612562179565, "rewards_train/margins_1": 4.253211736679077, "rewards_train/margins_2": 3.1652004718780518, "step": 312 }, { "epoch": 0.93, "logps_train/policy_1_2": -222.25778198242188, "logps_train/policy_1_l": -186.40701293945312, "logps_train/policy_1_w": -122.90392303466797, "logps_train/policy_2_2": -143.47824096679688, "logps_train/policy_2_w": -196.86216735839844, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -2.1585917472839355, "rewards_train/1-l": -2.0565218925476074, "rewards_train/1-w": 2.9548230171203613, "rewards_train/2-2": 2.8838157653808594, "rewards_train/2-w": -1.5119000673294067, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.011344909667969, "rewards_train/margins_1": 5.113414764404297, "rewards_train/margins_2": 4.395715832710266, "step": 312 }, { "epoch": 0.94, "logps_train/policy_1_2": -174.01605224609375, "logps_train/policy_1_l": -175.05084228515625, "logps_train/policy_1_w": -151.5513916015625, "logps_train/policy_2_2": -111.01991271972656, "logps_train/policy_2_w": -239.50381469726562, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -1.3719186782836914, "rewards_train/1-l": -1.4798893928527832, "rewards_train/1-w": 3.49798583984375, "rewards_train/2-2": 2.180577278137207, "rewards_train/2-w": -1.99491286277771, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.977875232696533, "rewards_train/margins_1": 4.869904518127441, "rewards_train/margins_2": 4.175490140914917, "step": 313 }, { "epoch": 0.94, "logps_train/policy_1_2": -113.5071029663086, "logps_train/policy_1_l": -127.42350006103516, "logps_train/policy_1_w": -156.10159301757812, "logps_train/policy_2_2": -71.01668548583984, "logps_train/policy_2_w": -234.4927978515625, "logps_train/ref_1_2": -102.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -84.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": -1.1307885646820068, "rewards_train/1-l": -1.615494728088379, "rewards_train/1-w": 2.803121566772461, "rewards_train/2-2": 1.2858316898345947, "rewards_train/2-w": -2.174574136734009, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.41861629486084, "rewards_train/margins_1": 3.9339101314544678, "rewards_train/margins_2": 3.4604058265686035, "step": 313 }, { "epoch": 0.94, "logps_train/policy_1_2": -132.11019897460938, "logps_train/policy_1_l": -135.8831787109375, "logps_train/policy_1_w": -79.69857788085938, "logps_train/policy_2_2": -78.88665771484375, "logps_train/policy_2_w": -127.79918670654297, "logps_train/ref_1_2": -112.5, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -97.5, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": -1.9745385646820068, "rewards_train/1-l": -1.6951963901519775, "rewards_train/1-w": 1.7621310949325562, "rewards_train/2-2": 1.5975948572158813, "rewards_train/2-w": -1.1198303699493408, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.4573274850845337, "rewards_train/margins_1": 3.736669659614563, "rewards_train/margins_2": 2.717425227165222, "step": 313 }, { "epoch": 0.94, "logps_train/policy_1_2": -144.15756225585938, "logps_train/policy_1_l": -122.23887634277344, "logps_train/policy_1_w": -76.09358215332031, "logps_train/policy_2_2": -86.23865509033203, "logps_train/policy_2_w": -128.23403930664062, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -119.5, "rewards_train/1-2": -1.075521469116211, "rewards_train/1-l": -1.3973249197006226, "rewards_train/1-w": 1.9455249309539795, "rewards_train/2-2": 2.30660343170166, "rewards_train/2-w": -0.8804359436035156, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.342849850654602, "rewards_train/margins_1": 3.0210464000701904, "rewards_train/margins_2": 3.187039375305176, "step": 313 }, { "epoch": 0.94, "learning_rate": 2.9665122935613727e-06, "loss": 0.8747, "step": 314 }, { "epoch": 0.94, "logps_train/policy_1_2": -99.97772216796875, "logps_train/policy_1_l": -131.64410400390625, "logps_train/policy_1_w": -82.30641174316406, "logps_train/policy_2_2": -64.75701141357422, "logps_train/policy_2_w": -123.93773651123047, "logps_train/ref_1_2": -93.5, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -78.5, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": -0.6665216684341431, "rewards_train/1-l": -2.079106330871582, "rewards_train/1-w": 1.6233636140823364, "rewards_train/2-2": 1.3803534507751465, "rewards_train/2-w": -1.0119376182556152, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.7024699449539185, "rewards_train/margins_1": 2.2898852825164795, "rewards_train/margins_2": 2.3922910690307617, "step": 314 }, { "epoch": 0.94, "logps_train/policy_1_2": -128.14076232910156, "logps_train/policy_1_l": -106.66812133789062, "logps_train/policy_1_w": -85.39694213867188, "logps_train/policy_2_2": -81.10108947753906, "logps_train/policy_2_w": -120.89591217041016, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -97.5, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": -1.0867327451705933, "rewards_train/1-l": -1.3066558837890625, "rewards_train/1-w": 1.211087703704834, "rewards_train/2-2": 1.8781726360321045, "rewards_train/2-w": -0.9817789196968079, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.5177435874938965, "rewards_train/margins_1": 2.2978204488754272, "rewards_train/margins_2": 2.8599515557289124, "step": 314 }, { "epoch": 0.94, "logps_train/policy_1_2": -177.33782958984375, "logps_train/policy_1_l": -158.63333129882812, "logps_train/policy_1_w": -92.04478454589844, "logps_train/policy_2_2": -120.91056823730469, "logps_train/policy_2_w": -125.10018920898438, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -103.5, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": -1.4605411291122437, "rewards_train/1-l": -1.2723652124404907, "rewards_train/1-w": 1.1377089023590088, "rewards_train/2-2": 2.1276934146881104, "rewards_train/2-w": -0.6166583299636841, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 2.4100741147994995, "rewards_train/margins_1": 2.5982500314712524, "rewards_train/margins_2": 2.7443517446517944, "step": 314 }, { "epoch": 0.94, "logps_train/policy_1_2": -176.4165802001953, "logps_train/policy_1_l": -139.64035034179688, "logps_train/policy_1_w": -145.95492553710938, "logps_train/policy_2_2": -118.31869506835938, "logps_train/policy_2_w": -225.4225616455078, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -1.4701733589172363, "rewards_train/1-l": -2.1984100341796875, "rewards_train/1-w": 3.2849764823913574, "rewards_train/2-2": 2.3384430408477783, "rewards_train/2-w": -1.5735070705413818, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.483386516571045, "rewards_train/margins_1": 4.755149841308594, "rewards_train/margins_2": 3.91195011138916, "step": 314 }, { "epoch": 0.94, "logps_train/policy_1_2": -77.1746826171875, "logps_train/policy_1_l": -56.91062927246094, "logps_train/policy_1_w": -70.75352478027344, "logps_train/policy_2_2": -46.31326675415039, "logps_train/policy_2_w": -114.63380432128906, "logps_train/ref_1_2": -72.0, "logps_train/ref_1_l": -48.75, "logps_train/ref_1_w": -87.5, "logps_train/ref_2_2": -59.25, "logps_train/ref_2_w": -105.5, "rewards_train/1-2": -0.5342652797698975, "rewards_train/1-l": -0.8308092355728149, "rewards_train/1-w": 1.6715219020843506, "rewards_train/2-2": 1.2923061847686768, "rewards_train/2-w": -0.9165046215057373, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.5023311376571655, "rewards_train/margins_1": 2.205787181854248, "rewards_train/margins_2": 2.208810806274414, "step": 315 }, { "epoch": 0.94, "logps_train/policy_1_2": -207.661865234375, "logps_train/policy_1_l": -155.00851440429688, "logps_train/policy_1_w": -121.22831726074219, "logps_train/policy_2_2": -137.23507690429688, "logps_train/policy_2_w": -191.39532470703125, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -2.124390125274658, "rewards_train/1-l": -1.5057344436645508, "rewards_train/1-w": 2.6740431785583496, "rewards_train/2-2": 2.458134174346924, "rewards_train/2-w": -1.4864085912704468, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.1797776222229, "rewards_train/margins_1": 4.798433303833008, "rewards_train/margins_2": 3.9445427656173706, "step": 315 }, { "epoch": 0.94, "logps_train/policy_1_2": -242.5487060546875, "logps_train/policy_1_l": -178.81607055664062, "logps_train/policy_1_w": -81.03570556640625, "logps_train/policy_2_2": -175.03890991210938, "logps_train/policy_2_w": -118.78033447265625, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -96.5, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -113.5, "rewards_train/1-2": -1.8611209392547607, "rewards_train/1-l": -2.1856117248535156, "rewards_train/1-w": 1.52308988571167, "rewards_train/2-2": 2.2521650791168213, "rewards_train/2-w": -0.5440488457679749, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7087016105651855, "rewards_train/margins_1": 3.3842108249664307, "rewards_train/margins_2": 2.796213924884796, "step": 315 }, { "epoch": 0.94, "logps_train/policy_1_2": -156.77757263183594, "logps_train/policy_1_l": -172.0025177001953, "logps_train/policy_1_w": -149.316650390625, "logps_train/policy_2_2": -109.30908966064453, "logps_train/policy_2_w": -213.2015380859375, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": -0.6554917097091675, "rewards_train/1-l": -1.7229080200195312, "rewards_train/1-w": 2.3986101150512695, "rewards_train/2-2": 2.0417470932006836, "rewards_train/2-w": -1.3749372959136963, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.121518135070801, "rewards_train/margins_1": 3.054101824760437, "rewards_train/margins_2": 3.41668438911438, "step": 315 }, { "epoch": 0.95, "learning_rate": 2.9422256878064326e-06, "loss": 1.2884, "step": 316 }, { "epoch": 0.95, "logps_train/policy_1_2": -273.4039306640625, "logps_train/policy_1_l": -248.7927703857422, "logps_train/policy_1_w": -190.07107543945312, "logps_train/policy_2_2": -181.61965942382812, "logps_train/policy_2_w": -283.29718017578125, "logps_train/ref_1_2": -247.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -225.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": -2.6646108627319336, "rewards_train/1-l": -2.7158002853393555, "rewards_train/1-w": 3.45070481300354, "rewards_train/2-2": 3.2204556465148926, "rewards_train/2-w": -2.3594045639038086, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.1665050983428955, "rewards_train/margins_1": 6.115315675735474, "rewards_train/margins_2": 5.579860210418701, "step": 316 }, { "epoch": 0.95, "logps_train/policy_1_2": -188.5040740966797, "logps_train/policy_1_l": -161.1290283203125, "logps_train/policy_1_w": -140.26707458496094, "logps_train/policy_2_2": -122.60548400878906, "logps_train/policy_2_w": -218.72030639648438, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -2.202641248703003, "rewards_train/1-l": -1.7563612461090088, "rewards_train/1-w": 2.8563003540039062, "rewards_train/2-2": 1.7913225889205933, "rewards_train/2-w": -2.479062080383301, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.612661600112915, "rewards_train/margins_1": 5.058941602706909, "rewards_train/margins_2": 4.270384669303894, "step": 316 }, { "epoch": 0.95, "logps_train/policy_1_2": -227.55612182617188, "logps_train/policy_1_l": -234.11654663085938, "logps_train/policy_1_w": -143.5431365966797, "logps_train/policy_2_2": -155.27137756347656, "logps_train/policy_2_w": -219.76834106445312, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -1.4618616104125977, "rewards_train/1-l": -2.8194692134857178, "rewards_train/1-w": 3.3097479343414307, "rewards_train/2-2": 2.897080898284912, "rewards_train/2-w": -0.7838646173477173, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.129217147827148, "rewards_train/margins_1": 4.771609544754028, "rewards_train/margins_2": 3.6809455156326294, "step": 316 }, { "epoch": 0.95, "logps_train/policy_1_2": -133.77224731445312, "logps_train/policy_1_l": -148.92987060546875, "logps_train/policy_1_w": -92.14593505859375, "logps_train/policy_2_2": -95.14067077636719, "logps_train/policy_2_w": -139.21580505371094, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": 0.28332197666168213, "rewards_train/1-l": -1.3392760753631592, "rewards_train/1-w": 2.2371647357940674, "rewards_train/2-2": 2.448432683944702, "rewards_train/2-w": -0.8239238262176514, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5764408111572266, "rewards_train/margins_1": 1.9538427591323853, "rewards_train/margins_2": 3.2723565101623535, "step": 316 }, { "epoch": 0.95, "logps_train/policy_1_2": -141.82061767578125, "logps_train/policy_1_l": -117.62272644042969, "logps_train/policy_1_w": -92.95852661132812, "logps_train/policy_2_2": -94.49632263183594, "logps_train/policy_2_w": -133.93206787109375, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": -0.8437800407409668, "rewards_train/1-l": -1.3467457294464111, "rewards_train/1-w": 1.7740694284439087, "rewards_train/2-2": 2.1712656021118164, "rewards_train/2-w": -0.9059018492698669, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.12081515789032, "rewards_train/margins_1": 2.6178494691848755, "rewards_train/margins_2": 3.0771674513816833, "step": 317 }, { "epoch": 0.95, "logps_train/policy_1_2": -208.4019012451172, "logps_train/policy_1_l": -218.81263732910156, "logps_train/policy_1_w": -158.5953369140625, "logps_train/policy_2_2": -133.6732177734375, "logps_train/policy_2_w": -239.5006103515625, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -1.648002028465271, "rewards_train/1-l": -1.501476764678955, "rewards_train/1-w": 3.0439834594726562, "rewards_train/2-2": 2.5803329944610596, "rewards_train/2-w": -1.3406860828399658, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.545460224151611, "rewards_train/margins_1": 4.691985487937927, "rewards_train/margins_2": 3.9210190773010254, "step": 317 }, { "epoch": 0.95, "logps_train/policy_1_2": -173.2123565673828, "logps_train/policy_1_l": -161.54598999023438, "logps_train/policy_1_w": -111.2496566772461, "logps_train/policy_2_2": -120.75511169433594, "logps_train/policy_2_w": -155.38027954101562, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -1.3313921689987183, "rewards_train/1-l": -2.2268643379211426, "rewards_train/1-w": 1.6508158445358276, "rewards_train/2-2": 1.865407109260559, "rewards_train/2-w": -1.1083400249481201, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.87768018245697, "rewards_train/margins_1": 2.982208013534546, "rewards_train/margins_2": 2.973747134208679, "step": 317 }, { "epoch": 0.95, "logps_train/policy_1_2": -184.38516235351562, "logps_train/policy_1_l": -158.24488830566406, "logps_train/policy_1_w": -144.71966552734375, "logps_train/policy_2_2": -134.66058349609375, "logps_train/policy_2_w": -220.59188842773438, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -0.97757887840271, "rewards_train/1-l": -1.8821065425872803, "rewards_train/1-w": 2.8151440620422363, "rewards_train/2-2": 2.02573823928833, "rewards_train/2-w": -1.754110336303711, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.697250604629517, "rewards_train/margins_1": 3.7927229404449463, "rewards_train/margins_2": 3.779848575592041, "step": 317 }, { "epoch": 0.95, "learning_rate": 2.917895921603958e-06, "loss": 0.8115, "step": 318 }, { "epoch": 0.95, "logps_train/policy_1_2": -153.29940795898438, "logps_train/policy_1_l": -121.90753173828125, "logps_train/policy_1_w": -81.87165832519531, "logps_train/policy_2_2": -101.97602081298828, "logps_train/policy_2_w": -118.27982330322266, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -104.5, "logps_train/ref_1_w": -103.5, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -114.5, "rewards_train/1-2": -0.9147070646286011, "rewards_train/1-l": -1.7702456712722778, "rewards_train/1-w": 2.154435873031616, "rewards_train/2-2": 2.088531017303467, "rewards_train/2-w": -0.3726848363876343, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.924681544303894, "rewards_train/margins_1": 3.0691429376602173, "rewards_train/margins_2": 2.461215853691101, "step": 318 }, { "epoch": 0.95, "logps_train/policy_1_2": -230.5021209716797, "logps_train/policy_1_l": -203.53347778320312, "logps_train/policy_1_w": -148.75025939941406, "logps_train/policy_2_2": -169.13201904296875, "logps_train/policy_2_w": -215.6187286376953, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -1.0939611196517944, "rewards_train/1-l": -1.7572542428970337, "rewards_train/1-w": 3.1262450218200684, "rewards_train/2-2": 2.903985023498535, "rewards_train/2-w": -1.089216947555542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.883499264717102, "rewards_train/margins_1": 4.220206141471863, "rewards_train/margins_2": 3.993201971054077, "step": 318 }, { "epoch": 0.95, "logps_train/policy_1_2": -167.80795288085938, "logps_train/policy_1_l": -122.78013610839844, "logps_train/policy_1_w": -109.3633804321289, "logps_train/policy_2_2": -119.03056335449219, "logps_train/policy_2_w": -146.5585479736328, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.8503261804580688, "rewards_train/1-l": -1.704576015472412, "rewards_train/1-w": 2.304530620574951, "rewards_train/2-2": 2.3672566413879395, "rewards_train/2-w": 0.09297239780426025, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.009106636047363, "rewards_train/margins_1": 3.15485680103302, "rewards_train/margins_2": 2.274284243583679, "step": 318 }, { "epoch": 0.95, "logps_train/policy_1_2": -187.3468017578125, "logps_train/policy_1_l": -228.2458038330078, "logps_train/policy_1_w": -172.415283203125, "logps_train/policy_2_2": -143.35836791992188, "logps_train/policy_2_w": -233.01507568359375, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -227.0, "rewards_train/1-2": -0.43507149815559387, "rewards_train/1-l": -2.521064519882202, "rewards_train/1-w": 2.723511219024658, "rewards_train/2-2": 2.199270725250244, "rewards_train/2-w": -0.5855898857116699, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.24457573890686, "rewards_train/margins_1": 3.158582717180252, "rewards_train/margins_2": 2.784860610961914, "step": 318 }, { "epoch": 0.96, "logps_train/policy_1_2": -187.70803833007812, "logps_train/policy_1_l": -146.3350830078125, "logps_train/policy_1_w": -115.39087677001953, "logps_train/policy_2_2": -122.56553649902344, "logps_train/policy_2_w": -194.5912628173828, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -1.539552927017212, "rewards_train/1-l": -1.728478193283081, "rewards_train/1-w": 1.7624739408493042, "rewards_train/2-2": 2.4729390144348145, "rewards_train/2-w": -3.0020956993103027, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.4909521341323853, "rewards_train/margins_1": 3.302026867866516, "rewards_train/margins_2": 5.475034713745117, "step": 319 }, { "epoch": 0.96, "logps_train/policy_1_2": -161.11358642578125, "logps_train/policy_1_l": -145.3447723388672, "logps_train/policy_1_w": -117.8170394897461, "logps_train/policy_2_2": -113.64077758789062, "logps_train/policy_2_w": -173.29542541503906, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.787921667098999, "rewards_train/1-l": -1.1405506134033203, "rewards_train/1-w": 1.9536478519439697, "rewards_train/2-2": 2.0312349796295166, "rewards_train/2-w": -1.3512234687805176, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.09419846534729, "rewards_train/margins_1": 2.7415695190429688, "rewards_train/margins_2": 3.382458448410034, "step": 319 }, { "epoch": 0.96, "logps_train/policy_1_2": -237.0732421875, "logps_train/policy_1_l": -189.13070678710938, "logps_train/policy_1_w": -133.61131286621094, "logps_train/policy_2_2": -171.78567504882812, "logps_train/policy_2_w": -185.44036865234375, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": -1.529589056968689, "rewards_train/1-l": -2.5654141902923584, "rewards_train/1-w": 4.501368522644043, "rewards_train/2-2": 2.6155741214752197, "rewards_train/2-w": 1.37959623336792, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 7.066782712936401, "rewards_train/margins_1": 6.030957579612732, "rewards_train/margins_2": 1.2359778881072998, "step": 319 }, { "epoch": 0.96, "logps_train/policy_1_2": -194.79222106933594, "logps_train/policy_1_l": -227.43463134765625, "logps_train/policy_1_w": -133.6814727783203, "logps_train/policy_2_2": -122.412841796875, "logps_train/policy_2_w": -211.49188232421875, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.5811753273010254, "rewards_train/1-l": -1.977693796157837, "rewards_train/1-w": 3.214665412902832, "rewards_train/2-2": 2.4520747661590576, "rewards_train/2-w": -1.3710637092590332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.192359209060669, "rewards_train/margins_1": 4.795840740203857, "rewards_train/margins_2": 3.823138475418091, "step": 319 }, { "epoch": 0.96, "learning_rate": 2.8935253694962414e-06, "loss": 1.3277, "step": 320 }, { "epoch": 0.96, "logps_train/policy_1_2": -235.28256225585938, "logps_train/policy_1_l": -213.7036590576172, "logps_train/policy_1_w": -212.79595947265625, "logps_train/policy_2_2": -166.37127685546875, "logps_train/policy_2_w": -293.35015869140625, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -250.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -278.0, "rewards_train/1-2": -0.5954452157020569, "rewards_train/1-l": -1.9725148677825928, "rewards_train/1-w": 3.727630615234375, "rewards_train/2-2": 3.24998140335083, "rewards_train/2-w": -1.5639230012893677, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.700145483016968, "rewards_train/margins_1": 4.323075830936432, "rewards_train/margins_2": 4.813904404640198, "step": 320 }, { "epoch": 0.96, "logps_train/policy_1_2": -156.2182159423828, "logps_train/policy_1_l": -203.72589111328125, "logps_train/policy_1_w": -104.59834289550781, "logps_train/policy_2_2": -110.62677001953125, "logps_train/policy_2_w": -158.2057647705078, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -0.28158703446388245, "rewards_train/1-l": -2.210479974746704, "rewards_train/1-w": 2.0772740840911865, "rewards_train/2-2": 2.4703307151794434, "rewards_train/2-w": -0.9600297808647156, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.287754058837891, "rewards_train/margins_1": 2.358861118555069, "rewards_train/margins_2": 3.430360496044159, "step": 320 }, { "epoch": 0.96, "logps_train/policy_1_2": -210.8223876953125, "logps_train/policy_1_l": -206.91012573242188, "logps_train/policy_1_w": -119.73966979980469, "logps_train/policy_2_2": -138.54632568359375, "logps_train/policy_2_w": -181.165771484375, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.9970828294754028, "rewards_train/1-l": -2.6007790565490723, "rewards_train/1-w": 2.471735715866089, "rewards_train/2-2": 2.6063055992126465, "rewards_train/2-w": -1.2896238565444946, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.072514772415161, "rewards_train/margins_1": 4.468818545341492, "rewards_train/margins_2": 3.895929455757141, "step": 320 }, { "epoch": 0.96, "logps_train/policy_1_2": -115.47247314453125, "logps_train/policy_1_l": -131.14410400390625, "logps_train/policy_1_w": -94.62232971191406, "logps_train/policy_2_2": -78.20954895019531, "logps_train/policy_2_w": -141.7797393798828, "logps_train/ref_1_2": -109.5, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -93.5, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": -0.5925598740577698, "rewards_train/1-l": -1.968732476234436, "rewards_train/1-w": 2.027024745941162, "rewards_train/2-2": 1.5606855154037476, "rewards_train/2-w": -0.9047328233718872, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.995757222175598, "rewards_train/margins_1": 2.619584619998932, "rewards_train/margins_2": 2.4654183387756348, "step": 320 }, { "epoch": 0.96, "logps_train/policy_1_2": -261.34442138671875, "logps_train/policy_1_l": -212.01174926757812, "logps_train/policy_1_w": -133.11314392089844, "logps_train/policy_2_2": -166.32766723632812, "logps_train/policy_2_w": -193.0983428955078, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -207.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.9660835266113281, "rewards_train/1-l": -2.264554262161255, "rewards_train/1-w": 2.4293105602264404, "rewards_train/2-2": 4.069870471954346, "rewards_train/2-w": -1.0485057830810547, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.693864822387695, "rewards_train/margins_1": 4.3953940868377686, "rewards_train/margins_2": 5.1183762550354, "step": 321 }, { "epoch": 0.96, "logps_train/policy_1_2": -195.40841674804688, "logps_train/policy_1_l": -154.19677734375, "logps_train/policy_1_w": -128.4358367919922, "logps_train/policy_2_2": -126.10477447509766, "logps_train/policy_2_w": -202.54678344726562, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -1.489083170890808, "rewards_train/1-l": -1.1077630519866943, "rewards_train/1-w": 2.940791130065918, "rewards_train/2-2": 2.449873924255371, "rewards_train/2-w": -1.0953028202056885, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.048554182052612, "rewards_train/margins_1": 4.429874300956726, "rewards_train/margins_2": 3.5451767444610596, "step": 321 }, { "epoch": 0.96, "logps_train/policy_1_2": -222.55203247070312, "logps_train/policy_1_l": -171.77490234375, "logps_train/policy_1_w": -128.8640899658203, "logps_train/policy_2_2": -147.7945098876953, "logps_train/policy_2_w": -188.09051513671875, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -1.3315691947937012, "rewards_train/1-l": -1.4134286642074585, "rewards_train/1-w": 2.182731866836548, "rewards_train/2-2": 2.9175219535827637, "rewards_train/2-w": -1.42643404006958, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.5961605310440063, "rewards_train/margins_1": 3.514301061630249, "rewards_train/margins_2": 4.343955993652344, "step": 321 }, { "epoch": 0.96, "logps_train/policy_1_2": -184.89703369140625, "logps_train/policy_1_l": -133.90623474121094, "logps_train/policy_1_w": -112.50953674316406, "logps_train/policy_2_2": -126.11094665527344, "logps_train/policy_2_w": -170.00216674804688, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -1.1225180625915527, "rewards_train/1-l": -1.477146863937378, "rewards_train/1-w": 2.4855704307556152, "rewards_train/2-2": 2.5638070106506348, "rewards_train/2-w": -0.8929915428161621, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.962717294692993, "rewards_train/margins_1": 3.608088493347168, "rewards_train/margins_2": 3.456798553466797, "step": 321 }, { "epoch": 0.96, "learning_rate": 2.8691164100062035e-06, "loss": 0.8146, "step": 322 }, { "epoch": 0.96, "logps_train/policy_1_2": -177.8917999267578, "logps_train/policy_1_l": -143.0275115966797, "logps_train/policy_1_w": -112.97898864746094, "logps_train/policy_2_2": -118.05174255371094, "logps_train/policy_2_w": -171.2618408203125, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -0.9262899160385132, "rewards_train/1-l": -1.5455245971679688, "rewards_train/1-w": 2.071436882019043, "rewards_train/2-2": 2.794825792312622, "rewards_train/2-w": -1.4062621593475342, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.6169614791870117, "rewards_train/margins_1": 2.997726798057556, "rewards_train/margins_2": 4.201087951660156, "step": 322 }, { "epoch": 0.96, "logps_train/policy_1_2": -93.85884857177734, "logps_train/policy_1_l": -83.18328857421875, "logps_train/policy_1_w": -64.92668151855469, "logps_train/policy_2_2": -53.79917907714844, "logps_train/policy_2_w": -111.88554382324219, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -71.0, "logps_train/ref_1_w": -83.5, "logps_train/ref_2_2": -69.0, "logps_train/ref_2_w": -102.0, "rewards_train/1-2": -0.8757283687591553, "rewards_train/1-l": -1.2154967784881592, "rewards_train/1-w": 1.8534258604049683, "rewards_train/2-2": 1.487269401550293, "rewards_train/2-w": -0.9631637930870056, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.0689226388931274, "rewards_train/margins_1": 2.7291542291641235, "rewards_train/margins_2": 2.4504331946372986, "step": 322 }, { "epoch": 0.96, "logps_train/policy_1_2": -212.15164184570312, "logps_train/policy_1_l": -188.99618530273438, "logps_train/policy_1_w": -99.34115600585938, "logps_train/policy_2_2": -155.30844116210938, "logps_train/policy_2_w": -148.263916015625, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.343679666519165, "rewards_train/1-l": -1.9341899156570435, "rewards_train/1-w": 1.9904934167861938, "rewards_train/2-2": 2.3850746154785156, "rewards_train/2-w": -0.87912517786026, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 3.9246833324432373, "rewards_train/margins_1": 3.334173083305359, "rewards_train/margins_2": 3.2641997933387756, "step": 322 }, { "epoch": 0.96, "logps_train/policy_1_2": -174.2705078125, "logps_train/policy_1_l": -162.13583374023438, "logps_train/policy_1_w": -124.84365844726562, "logps_train/policy_2_2": -110.3149185180664, "logps_train/policy_2_w": -200.91775512695312, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -1.0383789539337158, "rewards_train/1-l": -1.700045108795166, "rewards_train/1-w": 2.6765480041503906, "rewards_train/2-2": 2.4700701236724854, "rewards_train/2-w": -1.788453459739685, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.376593112945557, "rewards_train/margins_1": 3.7149269580841064, "rewards_train/margins_2": 4.25852358341217, "step": 322 }, { "epoch": 0.97, "logps_train/policy_1_2": -185.18313598632812, "logps_train/policy_1_l": -185.84339904785156, "logps_train/policy_1_w": -121.21810913085938, "logps_train/policy_2_2": -143.79611206054688, "logps_train/policy_2_w": -168.64950561523438, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.0011261031031608582, "rewards_train/1-l": -1.7546526193618774, "rewards_train/1-w": 2.7912755012512207, "rewards_train/2-2": 2.4375762939453125, "rewards_train/2-w": 0.18426810204982758, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.545928120613098, "rewards_train/margins_1": 2.7924016043543816, "rewards_train/margins_2": 2.253308191895485, "step": 323 }, { "epoch": 0.97, "logps_train/policy_1_2": -245.09732055664062, "logps_train/policy_1_l": -219.6390380859375, "logps_train/policy_1_w": -166.04537963867188, "logps_train/policy_2_2": -172.8189697265625, "logps_train/policy_2_w": -238.10618591308594, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -1.7827787399291992, "rewards_train/1-l": -2.11751651763916, "rewards_train/1-w": 2.848489284515381, "rewards_train/2-2": 2.94193172454834, "rewards_train/2-w": -1.2467516660690308, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.966005802154541, "rewards_train/margins_1": 4.63126802444458, "rewards_train/margins_2": 4.188683390617371, "step": 323 }, { "epoch": 0.97, "logps_train/policy_1_2": -225.98793029785156, "logps_train/policy_1_l": -191.26040649414062, "logps_train/policy_1_w": -128.38116455078125, "logps_train/policy_2_2": -151.32650756835938, "logps_train/policy_2_w": -203.71746826171875, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.931605339050293, "rewards_train/1-l": -1.2016264200210571, "rewards_train/1-w": 2.5150091648101807, "rewards_train/2-2": 2.998990535736084, "rewards_train/2-w": -1.738933801651001, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.716635584831238, "rewards_train/margins_1": 4.446614503860474, "rewards_train/margins_2": 4.737924337387085, "step": 323 }, { "epoch": 0.97, "logps_train/policy_1_2": -185.34104919433594, "logps_train/policy_1_l": -166.20948791503906, "logps_train/policy_1_w": -138.7761993408203, "logps_train/policy_2_2": -121.14080810546875, "logps_train/policy_2_w": -203.17922973632812, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.7716050148010254, "rewards_train/1-l": -1.6829488277435303, "rewards_train/1-w": 2.3739547729492188, "rewards_train/2-2": 2.4847474098205566, "rewards_train/2-w": -1.5402500629425049, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.056903600692749, "rewards_train/margins_1": 4.145559787750244, "rewards_train/margins_2": 4.0249974727630615, "step": 323 }, { "epoch": 0.97, "learning_rate": 2.8446714254052617e-06, "loss": 0.8715, "step": 324 }, { "epoch": 0.97, "logps_train/policy_1_2": -211.61593627929688, "logps_train/policy_1_l": -170.20848083496094, "logps_train/policy_1_w": -122.65253448486328, "logps_train/policy_2_2": -156.43063354492188, "logps_train/policy_2_w": -177.5910186767578, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -0.7649139761924744, "rewards_train/1-l": -1.6716288328170776, "rewards_train/1-w": 2.2081832885742188, "rewards_train/2-2": 2.3525424003601074, "rewards_train/2-w": -1.179805040359497, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.8798121213912964, "rewards_train/margins_1": 2.973097264766693, "rewards_train/margins_2": 3.5323474407196045, "step": 324 }, { "epoch": 0.97, "logps_train/policy_1_2": -169.4730682373047, "logps_train/policy_1_l": -175.7974090576172, "logps_train/policy_1_w": -149.33084106445312, "logps_train/policy_2_2": -111.51194763183594, "logps_train/policy_2_w": -214.40054321289062, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -0.8916429281234741, "rewards_train/1-l": -1.9155317544937134, "rewards_train/1-w": 2.8721399307250977, "rewards_train/2-2": 2.206031560897827, "rewards_train/2-w": -1.0842912197113037, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.787671685218811, "rewards_train/margins_1": 3.7637828588485718, "rewards_train/margins_2": 3.290322780609131, "step": 324 }, { "epoch": 0.97, "logps_train/policy_1_2": -178.8974151611328, "logps_train/policy_1_l": -208.2960205078125, "logps_train/policy_1_w": -113.48430633544922, "logps_train/policy_2_2": -117.96395874023438, "logps_train/policy_2_w": -175.85467529296875, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.4241173267364502, "rewards_train/1-l": -1.907531499862671, "rewards_train/1-w": 1.9145091772079468, "rewards_train/2-2": 2.197744846343994, "rewards_train/2-w": -1.5265800952911377, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.8220406770706177, "rewards_train/margins_1": 3.338626503944397, "rewards_train/margins_2": 3.724324941635132, "step": 324 }, { "epoch": 0.97, "logps_train/policy_1_2": -187.61720275878906, "logps_train/policy_1_l": -177.35113525390625, "logps_train/policy_1_w": -140.60897827148438, "logps_train/policy_2_2": -133.97622680664062, "logps_train/policy_2_w": -202.72198486328125, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -0.41923993825912476, "rewards_train/1-l": -1.5175102949142456, "rewards_train/1-w": 2.94193434715271, "rewards_train/2-2": 2.69866681098938, "rewards_train/2-w": -1.2019834518432617, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.459444642066956, "rewards_train/margins_1": 3.3611742854118347, "rewards_train/margins_2": 3.9006502628326416, "step": 324 }, { "epoch": 0.97, "logps_train/policy_1_2": -171.81631469726562, "logps_train/policy_1_l": -163.49819946289062, "logps_train/policy_1_w": -124.40022277832031, "logps_train/policy_2_2": -108.95993041992188, "logps_train/policy_2_w": -189.71481323242188, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -1.577724814414978, "rewards_train/1-l": -1.651969313621521, "rewards_train/1-w": 2.8502116203308105, "rewards_train/2-2": 2.384085178375244, "rewards_train/2-w": -1.1851532459259033, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.5021809339523315, "rewards_train/margins_1": 4.427936434745789, "rewards_train/margins_2": 3.5692384243011475, "step": 325 }, { "epoch": 0.97, "logps_train/policy_1_2": -231.03453063964844, "logps_train/policy_1_l": -203.53619384765625, "logps_train/policy_1_w": -157.57955932617188, "logps_train/policy_2_2": -156.2674102783203, "logps_train/policy_2_w": -234.4595947265625, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": -1.8790148496627808, "rewards_train/1-l": -2.078399658203125, "rewards_train/1-w": 2.567824602127075, "rewards_train/2-2": 2.7133469581604004, "rewards_train/2-w": -1.8483039140701294, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.6462242603302, "rewards_train/margins_1": 4.446839451789856, "rewards_train/margins_2": 4.56165087223053, "step": 325 }, { "epoch": 0.97, "logps_train/policy_1_2": -239.69232177734375, "logps_train/policy_1_l": -159.71743774414062, "logps_train/policy_1_w": -149.55128479003906, "logps_train/policy_2_2": -161.73117065429688, "logps_train/policy_2_w": -221.54946899414062, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": -2.2557547092437744, "rewards_train/1-l": -1.961147427558899, "rewards_train/1-w": 3.6464338302612305, "rewards_train/2-2": 2.789088487625122, "rewards_train/2-w": -0.8502596020698547, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.607581257820129, "rewards_train/margins_1": 5.902188539505005, "rewards_train/margins_2": 3.639348089694977, "step": 325 }, { "epoch": 0.97, "logps_train/policy_1_2": -197.64938354492188, "logps_train/policy_1_l": -181.09127807617188, "logps_train/policy_1_w": -130.20492553710938, "logps_train/policy_2_2": -132.6805419921875, "logps_train/policy_2_w": -196.24630737304688, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.2446755170822144, "rewards_train/1-l": -1.7872517108917236, "rewards_train/1-w": 3.254507064819336, "rewards_train/2-2": 2.4753165245056152, "rewards_train/2-w": -1.0668177604675293, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.04175877571106, "rewards_train/margins_1": 4.49918258190155, "rewards_train/margins_2": 3.5421342849731445, "step": 325 }, { "epoch": 0.98, "learning_rate": 2.820192801480817e-06, "loss": 0.8929, "step": 326 }, { "epoch": 0.98, "logps_train/policy_1_2": -156.76060485839844, "logps_train/policy_1_l": -140.8810577392578, "logps_train/policy_1_w": -89.46569061279297, "logps_train/policy_2_2": -105.10667419433594, "logps_train/policy_2_w": -129.867919921875, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": -0.9215685129165649, "rewards_train/1-l": -1.2998732328414917, "rewards_train/1-w": 1.7888312339782715, "rewards_train/2-2": 2.019166946411133, "rewards_train/2-w": -0.4808233976364136, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.088704466819763, "rewards_train/margins_1": 2.7103997468948364, "rewards_train/margins_2": 2.4999903440475464, "step": 326 }, { "epoch": 0.98, "logps_train/policy_1_2": -142.6761016845703, "logps_train/policy_1_l": -146.14370727539062, "logps_train/policy_1_w": -91.64397430419922, "logps_train/policy_2_2": -95.38888549804688, "logps_train/policy_2_w": -132.70977783203125, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": -1.3023760318756104, "rewards_train/1-l": -1.5677895545959473, "rewards_train/1-w": 1.8451731204986572, "rewards_train/2-2": 1.8204864263534546, "rewards_train/2-w": -0.714728832244873, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.4129626750946045, "rewards_train/margins_1": 3.1475491523742676, "rewards_train/margins_2": 2.5352152585983276, "step": 326 }, { "epoch": 0.98, "logps_train/policy_1_2": -165.4741668701172, "logps_train/policy_1_l": -107.06495666503906, "logps_train/policy_1_w": -85.85519409179688, "logps_train/policy_2_2": -121.28514862060547, "logps_train/policy_2_w": -123.56755828857422, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": -0.4491739869117737, "rewards_train/1-l": -0.8146981000900269, "rewards_train/1-w": 1.6102807521820068, "rewards_train/2-2": 2.095118522644043, "rewards_train/2-w": -0.662224292755127, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 2.4249788522720337, "rewards_train/margins_1": 2.0594547390937805, "rewards_train/margins_2": 2.75734281539917, "step": 326 }, { "epoch": 0.98, "logps_train/policy_1_2": -165.80728149414062, "logps_train/policy_1_l": -159.04669189453125, "logps_train/policy_1_w": -132.14263916015625, "logps_train/policy_2_2": -110.87385559082031, "logps_train/policy_2_w": -192.0126190185547, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.0322911739349365, "rewards_train/1-l": -1.3952940702438354, "rewards_train/1-w": 3.0169849395751953, "rewards_train/2-2": 1.909489631652832, "rewards_train/2-w": -0.8434491157531738, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.412279009819031, "rewards_train/margins_1": 4.049276113510132, "rewards_train/margins_2": 2.752938747406006, "step": 326 }, { "epoch": 0.98, "logps_train/policy_1_2": -200.1207275390625, "logps_train/policy_1_l": -167.8133544921875, "logps_train/policy_1_w": -117.02848815917969, "logps_train/policy_2_2": -129.28598022460938, "logps_train/policy_2_w": -190.39474487304688, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -2.1698851585388184, "rewards_train/1-l": -1.5683586597442627, "rewards_train/1-w": 2.4034008979797363, "rewards_train/2-2": 2.356557846069336, "rewards_train/2-w": -1.5121312141418457, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.971759557723999, "rewards_train/margins_1": 4.573286056518555, "rewards_train/margins_2": 3.8686890602111816, "step": 327 }, { "epoch": 0.98, "logps_train/policy_1_2": -119.63238525390625, "logps_train/policy_1_l": -109.91273498535156, "logps_train/policy_1_w": -112.46156311035156, "logps_train/policy_2_2": -72.61579895019531, "logps_train/policy_2_w": -161.8822784423828, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -0.6640198230743408, "rewards_train/1-l": -1.187025785446167, "rewards_train/1-w": 2.582750082015991, "rewards_train/2-2": 1.8563892841339111, "rewards_train/2-w": -0.5772903561592102, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.769775867462158, "rewards_train/margins_1": 3.246769905090332, "rewards_train/margins_2": 2.4336796402931213, "step": 327 }, { "epoch": 0.98, "logps_train/policy_1_2": -202.80142211914062, "logps_train/policy_1_l": -146.452880859375, "logps_train/policy_1_w": -126.84693145751953, "logps_train/policy_2_2": -137.15481567382812, "logps_train/policy_2_w": -196.8186798095703, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -0.9651035666465759, "rewards_train/1-l": -1.328613042831421, "rewards_train/1-w": 2.501171350479126, "rewards_train/2-2": 3.0929181575775146, "rewards_train/2-w": -1.728377103805542, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.829784393310547, "rewards_train/margins_1": 3.466274917125702, "rewards_train/margins_2": 4.821295261383057, "step": 327 }, { "epoch": 0.98, "logps_train/policy_1_2": -174.3221893310547, "logps_train/policy_1_l": -179.30015563964844, "logps_train/policy_1_w": -121.57481384277344, "logps_train/policy_2_2": -129.20814514160156, "logps_train/policy_2_w": -178.31163024902344, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -0.9040942192077637, "rewards_train/1-l": -2.0606799125671387, "rewards_train/1-w": 1.9725970029830933, "rewards_train/2-2": 1.9627788066864014, "rewards_train/2-w": -1.254990816116333, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.033276915550232, "rewards_train/margins_1": 2.876691222190857, "rewards_train/margins_2": 3.2177696228027344, "step": 327 }, { "epoch": 0.98, "learning_rate": 2.7956829273034146e-06, "loss": 0.9533, "step": 328 }, { "epoch": 0.98, "logps_train/policy_1_2": -107.752685546875, "logps_train/policy_1_l": -70.45170593261719, "logps_train/policy_1_w": -65.73338317871094, "logps_train/policy_2_2": -65.37921142578125, "logps_train/policy_2_w": -108.27778625488281, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -63.0, "logps_train/ref_1_w": -81.5, "logps_train/ref_2_2": -83.5, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": -0.7803464531898499, "rewards_train/1-l": -0.7438091039657593, "rewards_train/1-w": 1.5792927742004395, "rewards_train/2-2": 1.8075374364852905, "rewards_train/2-w": -1.0136678218841553, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.3231018781661987, "rewards_train/margins_1": 2.3596392273902893, "rewards_train/margins_2": 2.821205258369446, "step": 328 }, { "epoch": 0.98, "logps_train/policy_1_2": -182.95542907714844, "logps_train/policy_1_l": -122.71408081054688, "logps_train/policy_1_w": -116.07405853271484, "logps_train/policy_2_2": -118.73204803466797, "logps_train/policy_2_w": -169.0362548828125, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -1.064292550086975, "rewards_train/1-l": -1.276486873626709, "rewards_train/1-w": 2.8089513778686523, "rewards_train/2-2": 2.87699031829834, "rewards_train/2-w": -0.3665158152580261, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.085438251495361, "rewards_train/margins_1": 3.8732439279556274, "rewards_train/margins_2": 3.243506133556366, "step": 328 }, { "epoch": 0.98, "logps_train/policy_1_2": -193.402099609375, "logps_train/policy_1_l": -192.34515380859375, "logps_train/policy_1_w": -151.58856201171875, "logps_train/policy_2_2": -137.75216674804688, "logps_train/policy_2_w": -197.19461059570312, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -1.0741934776306152, "rewards_train/1-l": -2.009124755859375, "rewards_train/1-w": 3.1495437622070312, "rewards_train/2-2": 2.599783420562744, "rewards_train/2-w": 0.4727265536785126, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.158668518066406, "rewards_train/margins_1": 4.2237372398376465, "rewards_train/margins_2": 2.1270568668842316, "step": 328 }, { "epoch": 0.98, "logps_train/policy_1_2": -193.810546875, "logps_train/policy_1_l": -178.68283081054688, "logps_train/policy_1_w": -154.5584259033203, "logps_train/policy_2_2": -129.9463653564453, "logps_train/policy_2_w": -223.9215087890625, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -1.2966804504394531, "rewards_train/1-l": -1.6169161796569824, "rewards_train/1-w": 2.5949392318725586, "rewards_train/2-2": 2.690519332885742, "rewards_train/2-w": -1.7218379974365234, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.211855411529541, "rewards_train/margins_1": 3.8916196823120117, "rewards_train/margins_2": 4.412357330322266, "step": 328 }, { "epoch": 0.99, "logps_train/policy_1_2": -158.37295532226562, "logps_train/policy_1_l": -166.70797729492188, "logps_train/policy_1_w": -121.9950942993164, "logps_train/policy_2_2": -102.98719024658203, "logps_train/policy_2_w": -184.54183959960938, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.1661045551300049, "rewards_train/1-l": -2.713106393814087, "rewards_train/1-w": 2.511916160583496, "rewards_train/2-2": 2.147178888320923, "rewards_train/2-w": -1.1567223072052002, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.225022554397583, "rewards_train/margins_1": 3.678020715713501, "rewards_train/margins_2": 3.303901195526123, "step": 329 }, { "epoch": 0.99, "logps_train/policy_1_2": -180.46856689453125, "logps_train/policy_1_l": -162.32382202148438, "logps_train/policy_1_w": -109.10883331298828, "logps_train/policy_2_2": -130.474365234375, "logps_train/policy_2_w": -162.67593383789062, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -0.7278621196746826, "rewards_train/1-l": -1.4280848503112793, "rewards_train/1-w": 2.0883352756500244, "rewards_train/2-2": 2.4219725131988525, "rewards_train/2-w": -1.0410308837890625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5164201259613037, "rewards_train/margins_1": 2.816197395324707, "rewards_train/margins_2": 3.463003396987915, "step": 329 }, { "epoch": 0.99, "logps_train/policy_1_2": -157.24658203125, "logps_train/policy_1_l": -188.53131103515625, "logps_train/policy_1_w": -142.61700439453125, "logps_train/policy_2_2": -108.51368713378906, "logps_train/policy_2_w": -205.34185791015625, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -0.6465322971343994, "rewards_train/1-l": -1.6127022504806519, "rewards_train/1-w": 2.80118989944458, "rewards_train/2-2": 2.3212876319885254, "rewards_train/2-w": -0.8287155032157898, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.413892149925232, "rewards_train/margins_1": 3.4477221965789795, "rewards_train/margins_2": 3.150003135204315, "step": 329 }, { "epoch": 0.99, "logps_train/policy_1_2": -181.1805419921875, "logps_train/policy_1_l": -151.65528869628906, "logps_train/policy_1_w": -115.78460693359375, "logps_train/policy_2_2": -114.94964599609375, "logps_train/policy_2_w": -188.75030517578125, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.5930542945861816, "rewards_train/1-l": -1.0487191677093506, "rewards_train/1-w": 2.9231019020080566, "rewards_train/2-2": 2.442535161972046, "rewards_train/2-w": -1.2445619106292725, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9718210697174072, "rewards_train/margins_1": 4.516156196594238, "rewards_train/margins_2": 3.6870970726013184, "step": 329 }, { "epoch": 0.99, "learning_rate": 2.771144194993564e-06, "loss": 0.9471, "step": 330 }, { "epoch": 0.99, "logps_train/policy_1_2": -193.96095275878906, "logps_train/policy_1_l": -230.01065063476562, "logps_train/policy_1_w": -146.6170654296875, "logps_train/policy_2_2": -136.61404418945312, "logps_train/policy_2_w": -208.46009826660156, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": -0.8984382748603821, "rewards_train/1-l": -1.8993072509765625, "rewards_train/1-w": 2.8578250408172607, "rewards_train/2-2": 2.7100791931152344, "rewards_train/2-w": -0.8112446665763855, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.757132291793823, "rewards_train/margins_1": 3.756263315677643, "rewards_train/margins_2": 3.52132385969162, "step": 330 }, { "epoch": 0.99, "logps_train/policy_1_2": -182.08157348632812, "logps_train/policy_1_l": -108.91893005371094, "logps_train/policy_1_w": -97.59886169433594, "logps_train/policy_2_2": -122.04883575439453, "logps_train/policy_2_w": -143.4371795654297, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -0.7280784845352173, "rewards_train/1-l": -1.092088222503662, "rewards_train/1-w": 2.4117937088012695, "rewards_train/2-2": 3.0994138717651367, "rewards_train/2-w": -0.11110040545463562, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.5038819313049316, "rewards_train/margins_1": 3.139872193336487, "rewards_train/margins_2": 3.2105142772197723, "step": 330 }, { "epoch": 0.99, "logps_train/policy_1_2": -129.161865234375, "logps_train/policy_1_l": -159.37362670898438, "logps_train/policy_1_w": -92.63800048828125, "logps_train/policy_2_2": -90.27204132080078, "logps_train/policy_2_w": -140.16964721679688, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -0.3926510512828827, "rewards_train/1-l": -1.4108011722564697, "rewards_train/1-w": 2.2860052585601807, "rewards_train/2-2": 1.8276787996292114, "rewards_train/2-w": -0.3052442967891693, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.6968064308166504, "rewards_train/margins_1": 2.6786563098430634, "rewards_train/margins_2": 2.1329230964183807, "step": 330 }, { "epoch": 0.99, "logps_train/policy_1_2": -182.30496215820312, "logps_train/policy_1_l": -155.9581298828125, "logps_train/policy_1_w": -101.64227294921875, "logps_train/policy_2_2": -107.65780639648438, "logps_train/policy_2_w": -160.82313537597656, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": -2.611745834350586, "rewards_train/1-l": -1.4843858480453491, "rewards_train/1-w": 1.8756170272827148, "rewards_train/2-2": 2.1149814128875732, "rewards_train/2-w": -1.5610237121582031, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.360002875328064, "rewards_train/margins_1": 4.487362861633301, "rewards_train/margins_2": 3.6760051250457764, "step": 330 }, { "epoch": 0.99, "logps_train/policy_1_2": -195.12210083007812, "logps_train/policy_1_l": -206.03753662109375, "logps_train/policy_1_w": -149.989990234375, "logps_train/policy_2_2": -138.68849182128906, "logps_train/policy_2_w": -210.2823486328125, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -0.1381869912147522, "rewards_train/1-l": -1.9537532329559326, "rewards_train/1-w": 2.788891315460205, "rewards_train/2-2": 3.2416977882385254, "rewards_train/2-w": -0.6590937376022339, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.742644548416138, "rewards_train/margins_1": 2.9270783066749573, "rewards_train/margins_2": 3.9007915258407593, "step": 331 }, { "epoch": 0.99, "logps_train/policy_1_2": -221.72634887695312, "logps_train/policy_1_l": -182.82281494140625, "logps_train/policy_1_w": -164.79739379882812, "logps_train/policy_2_2": -154.66827392578125, "logps_train/policy_2_w": -234.2527313232422, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -1.0534932613372803, "rewards_train/1-l": -1.3666578531265259, "rewards_train/1-w": 2.8796353340148926, "rewards_train/2-2": 3.1003613471984863, "rewards_train/2-w": -1.2362117767333984, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.2462931871414185, "rewards_train/margins_1": 3.933128595352173, "rewards_train/margins_2": 4.336573123931885, "step": 331 }, { "epoch": 0.99, "logps_train/policy_1_2": -182.65225219726562, "logps_train/policy_1_l": -187.18643188476562, "logps_train/policy_1_w": -128.67373657226562, "logps_train/policy_2_2": -119.69447326660156, "logps_train/policy_2_w": -203.25338745117188, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.6046791076660156, "rewards_train/1-l": -2.2073147296905518, "rewards_train/1-w": 2.8390731811523438, "rewards_train/2-2": 2.176060676574707, "rewards_train/2-w": -2.1218225955963135, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.0463879108428955, "rewards_train/margins_1": 4.443752288818359, "rewards_train/margins_2": 4.2978832721710205, "step": 331 }, { "epoch": 0.99, "logps_train/policy_1_2": -197.13998413085938, "logps_train/policy_1_l": -189.28744506835938, "logps_train/policy_1_w": -151.73403930664062, "logps_train/policy_2_2": -137.64569091796875, "logps_train/policy_2_w": -217.47610473632812, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -0.8128255605697632, "rewards_train/1-l": -1.7200541496276855, "rewards_train/1-w": 2.912996292114258, "rewards_train/2-2": 2.655792713165283, "rewards_train/2-w": -1.1952165365219116, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.633050441741943, "rewards_train/margins_1": 3.725821852684021, "rewards_train/margins_2": 3.851009249687195, "step": 331 }, { "epoch": 0.99, "learning_rate": 2.7465789994882796e-06, "loss": 0.7835, "step": 332 }, { "epoch": 0.99, "logps_train/policy_1_2": -219.08438110351562, "logps_train/policy_1_l": -90.96520233154297, "logps_train/policy_1_w": -87.12162780761719, "logps_train/policy_2_2": -151.70883178710938, "logps_train/policy_2_w": -133.4418487548828, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": -1.4799236059188843, "rewards_train/1-l": -0.7778680324554443, "rewards_train/1-w": 1.8636677265167236, "rewards_train/2-2": 3.1795082092285156, "rewards_train/2-w": -0.788521409034729, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 2.641535758972168, "rewards_train/margins_1": 3.343591332435608, "rewards_train/margins_2": 3.9680296182632446, "step": 332 }, { "epoch": 0.99, "logps_train/policy_1_2": -152.333984375, "logps_train/policy_1_l": -145.6633758544922, "logps_train/policy_1_w": -121.1051254272461, "logps_train/policy_2_2": -105.1524658203125, "logps_train/policy_2_w": -187.21725463867188, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -0.8835944533348083, "rewards_train/1-l": -1.1466113328933716, "rewards_train/1-w": 2.4031591415405273, "rewards_train/2-2": 2.1342644691467285, "rewards_train/2-w": -1.5219204425811768, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.549770474433899, "rewards_train/margins_1": 3.2867535948753357, "rewards_train/margins_2": 3.6561849117279053, "step": 332 }, { "epoch": 0.99, "logps_train/policy_1_2": -200.3808135986328, "logps_train/policy_1_l": -190.60894775390625, "logps_train/policy_1_w": -140.3024444580078, "logps_train/policy_2_2": -136.55104064941406, "logps_train/policy_2_w": -209.578369140625, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.40058171749115, "rewards_train/1-l": -1.4528861045837402, "rewards_train/1-w": 2.6967086791992188, "rewards_train/2-2": 2.623997688293457, "rewards_train/2-w": -1.2433841228485107, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.149594783782959, "rewards_train/margins_1": 4.097290396690369, "rewards_train/margins_2": 3.8673818111419678, "step": 332 }, { "epoch": 0.99, "logps_train/policy_1_2": -149.3565216064453, "logps_train/policy_1_l": -126.47721862792969, "logps_train/policy_1_w": -95.02146911621094, "logps_train/policy_2_2": -101.52024841308594, "logps_train/policy_2_w": -152.08938598632812, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.0622146129608154, "rewards_train/1-l": -1.2534831762313843, "rewards_train/1-w": 2.335352897644043, "rewards_train/2-2": 1.8987560272216797, "rewards_train/2-w": -1.2081576585769653, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.5888360738754272, "rewards_train/margins_1": 3.3975675106048584, "rewards_train/margins_2": 3.106913685798645, "step": 332 }, { "epoch": 1.0, "logps_train/policy_1_2": -147.2967529296875, "logps_train/policy_1_l": -133.289794921875, "logps_train/policy_1_w": -101.31787872314453, "logps_train/policy_2_2": -100.61373901367188, "logps_train/policy_2_w": -152.87750244140625, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -0.681238055229187, "rewards_train/1-l": -1.453686237335205, "rewards_train/1-w": 2.2463369369506836, "rewards_train/2-2": 1.857767105102539, "rewards_train/2-w": -0.8779842853546143, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7000231742858887, "rewards_train/margins_1": 2.9275749921798706, "rewards_train/margins_2": 2.7357513904571533, "step": 333 }, { "epoch": 1.0, "logps_train/policy_1_2": -141.18016052246094, "logps_train/policy_1_l": -132.4139404296875, "logps_train/policy_1_w": -98.64543151855469, "logps_train/policy_2_2": -90.15245819091797, "logps_train/policy_2_w": -150.61328125, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -1.244773507118225, "rewards_train/1-l": -1.103797197341919, "rewards_train/1-w": 2.2075023651123047, "rewards_train/2-2": 1.7888556718826294, "rewards_train/2-w": -0.9310065507888794, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.3112995624542236, "rewards_train/margins_1": 3.45227587223053, "rewards_train/margins_2": 2.719862222671509, "step": 333 }, { "epoch": 1.0, "logps_train/policy_1_2": -193.07601928710938, "logps_train/policy_1_l": -221.20492553710938, "logps_train/policy_1_w": -149.30523681640625, "logps_train/policy_2_2": -123.52862548828125, "logps_train/policy_2_w": -229.52883911132812, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -1.7315762042999268, "rewards_train/1-l": -2.6031086444854736, "rewards_train/1-w": 3.387444496154785, "rewards_train/2-2": 2.613543748855591, "rewards_train/2-w": -1.5958518981933594, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.990553140640259, "rewards_train/margins_1": 5.119020700454712, "rewards_train/margins_2": 4.20939564704895, "step": 333 }, { "epoch": 1.0, "logps_train/policy_1_2": -231.21588134765625, "logps_train/policy_1_l": -220.47702026367188, "logps_train/policy_1_w": -159.10427856445312, "logps_train/policy_2_2": -160.1104736328125, "logps_train/policy_2_w": -218.88082885742188, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -1.0309618711471558, "rewards_train/1-l": -2.820554256439209, "rewards_train/1-w": 2.976094961166382, "rewards_train/2-2": 3.3670783042907715, "rewards_train/2-w": -0.7458953261375427, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.796649217605591, "rewards_train/margins_1": 4.007056832313538, "rewards_train/margins_2": 4.112973630428314, "step": 333 }, { "epoch": 1.0, "learning_rate": 2.721989738307337e-06, "loss": 0.8071, "step": 334 }, { "epoch": 1.0, "logps_train/policy_1_2": -231.8544921875, "logps_train/policy_1_l": -141.65170288085938, "logps_train/policy_1_w": -114.33455657958984, "logps_train/policy_2_2": -150.19390869140625, "logps_train/policy_2_w": -182.9591064453125, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -2.456153392791748, "rewards_train/1-l": -1.7843106985092163, "rewards_train/1-w": 2.8786535263061523, "rewards_train/2-2": 2.9040470123291016, "rewards_train/2-w": -0.8498152494430542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.662964224815369, "rewards_train/margins_1": 5.3348069190979, "rewards_train/margins_2": 3.7538622617721558, "step": 334 }, { "epoch": 1.0, "logps_train/policy_1_2": -147.37063598632812, "logps_train/policy_1_l": -208.08218383789062, "logps_train/policy_1_w": -139.6099395751953, "logps_train/policy_2_2": -104.04470825195312, "logps_train/policy_2_w": -194.48910522460938, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -0.6495649218559265, "rewards_train/1-l": -2.355288028717041, "rewards_train/1-w": 2.661759853363037, "rewards_train/2-2": 1.9073456525802612, "rewards_train/2-w": -0.8410987854003906, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.017047882080078, "rewards_train/margins_1": 3.3113247752189636, "rewards_train/margins_2": 2.748444437980652, "step": 334 }, { "epoch": 1.0, "logps_train/policy_1_2": -116.54049682617188, "logps_train/policy_1_l": -101.14038848876953, "logps_train/policy_1_w": -67.20182800292969, "logps_train/policy_2_2": -66.23230743408203, "logps_train/policy_2_w": -127.19055938720703, "logps_train/ref_1_2": -103.5, "logps_train/ref_1_l": -85.5, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": -1.306394100189209, "rewards_train/1-l": -1.5402108430862427, "rewards_train/1-w": 1.9005208015441895, "rewards_train/2-2": 1.805675745010376, "rewards_train/2-w": -1.308508276939392, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.440731644630432, "rewards_train/margins_1": 3.2069149017333984, "rewards_train/margins_2": 3.114184021949768, "step": 334 }, { "epoch": 1.0, "logps_train/policy_1_2": -150.19032287597656, "logps_train/policy_1_l": -174.411376953125, "logps_train/policy_1_w": -135.12493896484375, "logps_train/policy_2_2": -100.7025375366211, "logps_train/policy_2_w": -190.00942993164062, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -0.6868062019348145, "rewards_train/1-l": -2.0843987464904785, "rewards_train/1-w": 2.5476624965667725, "rewards_train/2-2": 2.3803319931030273, "rewards_train/2-w": -0.8517231941223145, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.632061243057251, "rewards_train/margins_1": 3.234468698501587, "rewards_train/margins_2": 3.232055187225342, "step": 334 }, { "epoch": 1.0, "logps_train/policy_1_2": -188.8440399169922, "logps_train/policy_1_l": -212.12460327148438, "logps_train/policy_1_w": -153.679931640625, "logps_train/policy_2_2": -137.32220458984375, "logps_train/policy_2_w": -220.65509033203125, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -0.7383096218109131, "rewards_train/1-l": -2.2943942546844482, "rewards_train/1-w": 3.4815192222595215, "rewards_train/2-2": 2.5412182807922363, "rewards_train/2-w": -0.7164835929870605, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.77591347694397, "rewards_train/margins_1": 4.219828844070435, "rewards_train/margins_2": 3.257701873779297, "step": 335 }, { "epoch": 1.0, "logps_train/policy_1_2": -142.44091796875, "logps_train/policy_1_l": -129.36380004882812, "logps_train/policy_1_w": -111.26951599121094, "logps_train/policy_2_2": -91.00570678710938, "logps_train/policy_2_w": -179.5257110595703, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -1.181787133216858, "rewards_train/1-l": -1.5146520137786865, "rewards_train/1-w": 2.547657012939453, "rewards_train/2-2": 2.293667793273926, "rewards_train/2-w": -1.7904613018035889, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.06230902671814, "rewards_train/margins_1": 3.729444146156311, "rewards_train/margins_2": 4.084129095077515, "step": 335 }, { "epoch": 1.0, "logps_train/policy_1_2": -163.07981872558594, "logps_train/policy_1_l": -153.09866333007812, "logps_train/policy_1_w": -90.40663146972656, "logps_train/policy_2_2": -95.6325912475586, "logps_train/policy_2_w": -162.86129760742188, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.3025141954421997, "rewards_train/1-l": -2.0465846061706543, "rewards_train/1-w": 2.696836471557617, "rewards_train/2-2": 2.757443904876709, "rewards_train/2-w": -1.4923794269561768, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.7434210777282715, "rewards_train/margins_1": 3.999350666999817, "rewards_train/margins_2": 4.249823331832886, "step": 335 }, { "epoch": 1.0, "logps_train/policy_1_2": -178.01629638671875, "logps_train/policy_1_l": -172.52540588378906, "logps_train/policy_1_w": -114.77862548828125, "logps_train/policy_2_2": -133.65769958496094, "logps_train/policy_2_w": -161.542236328125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -0.24772393703460693, "rewards_train/1-l": -1.2463886737823486, "rewards_train/1-w": 2.5477240085601807, "rewards_train/2-2": 2.645362377166748, "rewards_train/2-w": -0.4432859420776367, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7941126823425293, "rewards_train/margins_1": 2.7954479455947876, "rewards_train/margins_2": 3.0886483192443848, "step": 335 }, { "epoch": 1.01, "learning_rate": 2.69737881131928e-06, "loss": 0.7883, "step": 336 }, { "epoch": 1.01, "logps_train/policy_1_2": -228.43609619140625, "logps_train/policy_1_l": -151.40882873535156, "logps_train/policy_1_w": -118.34892272949219, "logps_train/policy_2_2": -159.60028076171875, "logps_train/policy_2_w": -162.19747924804688, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.0476127862930298, "rewards_train/1-l": -1.795960545539856, "rewards_train/1-w": 2.42301869392395, "rewards_train/2-2": 3.3216118812561035, "rewards_train/2-w": -0.3467002809047699, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.218979239463806, "rewards_train/margins_1": 3.47063148021698, "rewards_train/margins_2": 3.6683121621608734, "step": 336 }, { "epoch": 1.01, "logps_train/policy_1_2": -199.43475341796875, "logps_train/policy_1_l": -230.37527465820312, "logps_train/policy_1_w": -132.05538940429688, "logps_train/policy_2_2": -130.9821014404297, "logps_train/policy_2_w": -196.44566345214844, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.3676955699920654, "rewards_train/1-l": -2.634989023208618, "rewards_train/1-w": 2.6175076961517334, "rewards_train/2-2": 2.927180528640747, "rewards_train/2-w": -1.0723011493682861, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.252496719360352, "rewards_train/margins_1": 3.985203266143799, "rewards_train/margins_2": 3.999481678009033, "step": 336 }, { "epoch": 1.01, "logps_train/policy_1_2": -181.56375122070312, "logps_train/policy_1_l": -146.52452087402344, "logps_train/policy_1_w": -98.02941131591797, "logps_train/policy_2_2": -122.49649810791016, "logps_train/policy_2_w": -154.80401611328125, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.5450482368469238, "rewards_train/1-l": -2.0378036499023438, "rewards_train/1-w": 2.6334846019744873, "rewards_train/2-2": 2.3612875938415527, "rewards_train/2-w": -0.7225894927978516, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.671288251876831, "rewards_train/margins_1": 4.178532838821411, "rewards_train/margins_2": 3.0838770866394043, "step": 336 }, { "epoch": 1.01, "logps_train/policy_1_2": -198.75765991210938, "logps_train/policy_1_l": -220.1970977783203, "logps_train/policy_1_w": -150.27142333984375, "logps_train/policy_2_2": -136.56912231445312, "logps_train/policy_2_w": -224.80850219726562, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -199.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": -0.7437354326248169, "rewards_train/1-l": -2.0970542430877686, "rewards_train/1-w": 3.6328179836273193, "rewards_train/2-2": 2.892305850982666, "rewards_train/2-w": -0.9474509954452515, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.729872226715088, "rewards_train/margins_1": 4.376553416252136, "rewards_train/margins_2": 3.8397568464279175, "step": 336 }, { "epoch": 1.01, "logps_train/policy_1_2": -237.31912231445312, "logps_train/policy_1_l": -175.9694366455078, "logps_train/policy_1_w": -110.95379638671875, "logps_train/policy_2_2": -165.0558319091797, "logps_train/policy_2_w": -159.73382568359375, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": -1.5951943397521973, "rewards_train/1-l": -2.2885451316833496, "rewards_train/1-w": 2.5430970191955566, "rewards_train/2-2": 3.081526756286621, "rewards_train/2-w": -0.5659595727920532, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.831642150878906, "rewards_train/margins_1": 4.138291358947754, "rewards_train/margins_2": 3.6474863290786743, "step": 337 }, { "epoch": 1.01, "logps_train/policy_1_2": -238.30712890625, "logps_train/policy_1_l": -231.65737915039062, "logps_train/policy_1_w": -165.179931640625, "logps_train/policy_2_2": -173.7275390625, "logps_train/policy_2_w": -259.13677978515625, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -209.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -245.0, "rewards_train/1-2": -0.9170423746109009, "rewards_train/1-l": -2.554800033569336, "rewards_train/1-w": 4.3775153160095215, "rewards_train/2-2": 3.4264638423919678, "rewards_train/2-w": -1.422466516494751, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.932315349578857, "rewards_train/margins_1": 5.294557690620422, "rewards_train/margins_2": 4.848930358886719, "step": 337 }, { "epoch": 1.01, "logps_train/policy_1_2": -191.05145263671875, "logps_train/policy_1_l": -140.00604248046875, "logps_train/policy_1_w": -105.60702514648438, "logps_train/policy_2_2": -112.64448547363281, "logps_train/policy_2_w": -174.00003051757812, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -2.3121750354766846, "rewards_train/1-l": -1.7459160089492798, "rewards_train/1-w": 2.604959487915039, "rewards_train/2-2": 3.040238857269287, "rewards_train/2-w": -1.7199240922927856, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.350875496864319, "rewards_train/margins_1": 4.917134523391724, "rewards_train/margins_2": 4.760162949562073, "step": 337 }, { "epoch": 1.01, "logps_train/policy_1_2": -231.1717529296875, "logps_train/policy_1_l": -140.30361938476562, "logps_train/policy_1_w": -116.1676025390625, "logps_train/policy_2_2": -139.0464324951172, "logps_train/policy_2_w": -199.27789306640625, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -2.328112840652466, "rewards_train/1-l": -1.3686437606811523, "rewards_train/1-w": 3.1337764263153076, "rewards_train/2-2": 3.451606512069702, "rewards_train/2-w": -2.1011276245117188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.50242018699646, "rewards_train/margins_1": 5.461889266967773, "rewards_train/margins_2": 5.552734136581421, "step": 337 }, { "epoch": 1.01, "learning_rate": 2.672748620507195e-06, "loss": 0.6219, "step": 338 }, { "epoch": 1.01, "logps_train/policy_1_2": -218.0703125, "logps_train/policy_1_l": -178.77227783203125, "logps_train/policy_1_w": -118.54611206054688, "logps_train/policy_2_2": -129.43112182617188, "logps_train/policy_2_w": -192.0299072265625, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -2.265625476837158, "rewards_train/1-l": -2.1305480003356934, "rewards_train/1-w": 2.7157018184661865, "rewards_train/2-2": 3.4463412761688232, "rewards_train/2-w": -1.7430291175842285, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.84624981880188, "rewards_train/margins_1": 4.981327295303345, "rewards_train/margins_2": 5.189370393753052, "step": 338 }, { "epoch": 1.01, "logps_train/policy_1_2": -223.1552734375, "logps_train/policy_1_l": -154.3134002685547, "logps_train/policy_1_w": -124.47034454345703, "logps_train/policy_2_2": -148.84744262695312, "logps_train/policy_2_w": -191.9983367919922, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -1.7733402252197266, "rewards_train/1-l": -1.8300212621688843, "rewards_train/1-w": 2.964293956756592, "rewards_train/2-2": 3.0007057189941406, "rewards_train/2-w": -1.5324504375457764, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.794315218925476, "rewards_train/margins_1": 4.737634181976318, "rewards_train/margins_2": 4.533156156539917, "step": 338 }, { "epoch": 1.01, "logps_train/policy_1_2": -203.02230834960938, "logps_train/policy_1_l": -198.05758666992188, "logps_train/policy_1_w": -146.34979248046875, "logps_train/policy_2_2": -152.68359375, "logps_train/policy_2_w": -212.99920654296875, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -0.647544264793396, "rewards_train/1-l": -2.002046585083008, "rewards_train/1-w": 3.4829888343811035, "rewards_train/2-2": 2.3871095180511475, "rewards_train/2-w": -1.119452714920044, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.485035419464111, "rewards_train/margins_1": 4.1305330991744995, "rewards_train/margins_2": 3.5065622329711914, "step": 338 }, { "epoch": 1.01, "logps_train/policy_1_2": -314.0372314453125, "logps_train/policy_1_l": -241.02760314941406, "logps_train/policy_1_w": -173.16751098632812, "logps_train/policy_2_2": -209.41867065429688, "logps_train/policy_2_w": -268.78973388671875, "logps_train/ref_1_2": -294.0, "logps_train/ref_1_l": -217.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -258.0, "logps_train/ref_2_w": -254.0, "rewards_train/1-2": -1.9974747896194458, "rewards_train/1-l": -2.414088249206543, "rewards_train/1-w": 4.464498519897461, "rewards_train/2-2": 4.812820911407471, "rewards_train/2-w": -1.393034815788269, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.878586769104004, "rewards_train/margins_1": 6.461973309516907, "rewards_train/margins_2": 6.20585572719574, "step": 338 }, { "epoch": 1.01, "logps_train/policy_1_2": -131.825927734375, "logps_train/policy_1_l": -89.40113830566406, "logps_train/policy_1_w": -88.49556732177734, "logps_train/policy_2_2": -90.75677490234375, "logps_train/policy_2_w": -126.66586303710938, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": -0.38493549823760986, "rewards_train/1-l": -1.1732680797576904, "rewards_train/1-w": 2.079740047454834, "rewards_train/2-2": 2.221588134765625, "rewards_train/2-w": -0.6204925775527954, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2530081272125244, "rewards_train/margins_1": 2.464675545692444, "rewards_train/margins_2": 2.8420807123184204, "step": 339 }, { "epoch": 1.01, "logps_train/policy_1_2": -133.9255828857422, "logps_train/policy_1_l": -160.8685760498047, "logps_train/policy_1_w": -59.61804962158203, "logps_train/policy_2_2": -85.99623107910156, "logps_train/policy_2_w": -96.44927978515625, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -76.5, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -90.0, "rewards_train/1-2": -0.8958780765533447, "rewards_train/1-l": -1.7051687240600586, "rewards_train/1-w": 1.7146600484848022, "rewards_train/2-2": 2.0501813888549805, "rewards_train/2-w": -0.6462953686714172, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.419828772544861, "rewards_train/margins_1": 2.610538125038147, "rewards_train/margins_2": 2.6964767575263977, "step": 339 }, { "epoch": 1.01, "logps_train/policy_1_2": -211.50155639648438, "logps_train/policy_1_l": -158.82089233398438, "logps_train/policy_1_w": -131.79713439941406, "logps_train/policy_2_2": -144.5208740234375, "logps_train/policy_2_w": -202.61819458007812, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": -1.684213399887085, "rewards_train/1-l": -2.0142428874969482, "rewards_train/1-w": 3.1545889377593994, "rewards_train/2-2": 2.756214141845703, "rewards_train/2-w": -1.3742218017578125, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.168831825256348, "rewards_train/margins_1": 4.838802337646484, "rewards_train/margins_2": 4.130435943603516, "step": 339 }, { "epoch": 1.01, "logps_train/policy_1_2": -170.3170166015625, "logps_train/policy_1_l": -153.82957458496094, "logps_train/policy_1_w": -109.50621032714844, "logps_train/policy_2_2": -111.94278717041016, "logps_train/policy_2_w": -174.00253295898438, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.1040154695510864, "rewards_train/1-l": -2.2435052394866943, "rewards_train/1-w": 2.9024548530578613, "rewards_train/2-2": 2.7146079540252686, "rewards_train/2-w": -1.186582088470459, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.145960092544556, "rewards_train/margins_1": 4.006470322608948, "rewards_train/margins_2": 3.9011900424957275, "step": 339 }, { "epoch": 1.02, "learning_rate": 2.648101569734286e-06, "loss": 0.791, "step": 340 }, { "epoch": 1.02, "logps_train/policy_1_2": -185.90707397460938, "logps_train/policy_1_l": -151.5470428466797, "logps_train/policy_1_w": -112.54557800292969, "logps_train/policy_2_2": -132.25221252441406, "logps_train/policy_2_w": -186.3529052734375, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.8090669512748718, "rewards_train/1-l": -1.9192800521850586, "rewards_train/1-w": 3.283723831176758, "rewards_train/2-2": 2.4490458965301514, "rewards_train/2-w": -1.658338189125061, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.203003883361816, "rewards_train/margins_1": 4.09279078245163, "rewards_train/margins_2": 4.107384085655212, "step": 340 }, { "epoch": 1.02, "logps_train/policy_1_2": -127.67630004882812, "logps_train/policy_1_l": -93.32197570800781, "logps_train/policy_1_w": -75.21357727050781, "logps_train/policy_2_2": -69.24952697753906, "logps_train/policy_2_w": -115.72400665283203, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -106.5, "rewards_train/1-2": -1.2411655187606812, "rewards_train/1-l": -1.3284865617752075, "rewards_train/1-w": 1.7464159727096558, "rewards_train/2-2": 2.173142433166504, "rewards_train/2-w": -0.9290415644645691, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.0749025344848633, "rewards_train/margins_1": 2.987581491470337, "rewards_train/margins_2": 3.102183997631073, "step": 340 }, { "epoch": 1.02, "logps_train/policy_1_2": -159.24111938476562, "logps_train/policy_1_l": -188.50674438476562, "logps_train/policy_1_w": -116.54110717773438, "logps_train/policy_2_2": -103.0776138305664, "logps_train/policy_2_w": -184.15863037109375, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.41434645652771, "rewards_train/1-l": -2.6396138668060303, "rewards_train/1-w": 2.9935460090637207, "rewards_train/2-2": 2.0947775840759277, "rewards_train/2-w": -1.1894962787628174, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.633159875869751, "rewards_train/margins_1": 4.407892465591431, "rewards_train/margins_2": 3.284273862838745, "step": 340 }, { "epoch": 1.02, "logps_train/policy_1_2": -264.7415771484375, "logps_train/policy_1_l": -180.34710693359375, "logps_train/policy_1_w": -129.53054809570312, "logps_train/policy_2_2": -181.09933471679688, "logps_train/policy_2_w": -183.07713317871094, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -217.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -1.8429090976715088, "rewards_train/1-l": -2.6774840354919434, "rewards_train/1-w": 2.952267646789551, "rewards_train/2-2": 3.613893985748291, "rewards_train/2-w": -0.539549708366394, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.629751682281494, "rewards_train/margins_1": 4.79517674446106, "rewards_train/margins_2": 4.153443694114685, "step": 340 }, { "epoch": 1.02, "logps_train/policy_1_2": -169.903076171875, "logps_train/policy_1_l": -212.24159240722656, "logps_train/policy_1_w": -184.7611083984375, "logps_train/policy_2_2": -108.0643310546875, "logps_train/policy_2_w": -276.75323486328125, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -213.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -243.0, "rewards_train/1-2": -1.4646248817443848, "rewards_train/1-l": -2.8347067832946777, "rewards_train/1-w": 2.838340997695923, "rewards_train/2-2": 2.7200870513916016, "rewards_train/2-w": -3.337432622909546, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.673047780990601, "rewards_train/margins_1": 4.302965879440308, "rewards_train/margins_2": 6.0575196743011475, "step": 341 }, { "epoch": 1.02, "logps_train/policy_1_2": -218.53494262695312, "logps_train/policy_1_l": -226.91506958007812, "logps_train/policy_1_w": -153.25511169433594, "logps_train/policy_2_2": -134.16189575195312, "logps_train/policy_2_w": -242.1112518310547, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": -1.8870890140533447, "rewards_train/1-l": -3.078421115875244, "rewards_train/1-w": 3.614333391189575, "rewards_train/2-2": 3.3533413410186768, "rewards_train/2-w": -1.9908123016357422, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.692754507064819, "rewards_train/margins_1": 5.50142240524292, "rewards_train/margins_2": 5.344153642654419, "step": 341 }, { "epoch": 1.02, "logps_train/policy_1_2": -230.00253295898438, "logps_train/policy_1_l": -175.79981994628906, "logps_train/policy_1_w": -151.9967498779297, "logps_train/policy_2_2": -146.93414306640625, "logps_train/policy_2_w": -233.17718505859375, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -1.676814317703247, "rewards_train/1-l": -2.2932634353637695, "rewards_train/1-w": 3.7534492015838623, "rewards_train/2-2": 3.6987719535827637, "rewards_train/2-w": -1.5110771656036377, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.046712636947632, "rewards_train/margins_1": 5.430263519287109, "rewards_train/margins_2": 5.209849119186401, "step": 341 }, { "epoch": 1.02, "logps_train/policy_1_2": -245.17892456054688, "logps_train/policy_1_l": -203.99267578125, "logps_train/policy_1_w": -143.9803466796875, "logps_train/policy_2_2": -161.71551513671875, "logps_train/policy_2_w": -217.47637939453125, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -1.5975799560546875, "rewards_train/1-l": -2.4208483695983887, "rewards_train/1-w": 3.706066131591797, "rewards_train/2-2": 4.013605117797852, "rewards_train/2-w": -0.7577930092811584, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.1269145011901855, "rewards_train/margins_1": 5.303646087646484, "rewards_train/margins_2": 4.77139812707901, "step": 341 }, { "epoch": 1.02, "learning_rate": 2.6234400645092576e-06, "loss": 0.7665, "step": 342 }, { "epoch": 1.02, "logps_train/policy_1_2": -123.15888977050781, "logps_train/policy_1_l": -94.76302337646484, "logps_train/policy_1_w": -78.39337158203125, "logps_train/policy_2_2": -78.74910736083984, "logps_train/policy_2_w": -131.94766235351562, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -100.5, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": -0.7143264412879944, "rewards_train/1-l": -1.3825523853302002, "rewards_train/1-w": 2.2012882232666016, "rewards_train/2-2": 2.297745704650879, "rewards_train/2-w": -1.126015543937683, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.5838406085968018, "rewards_train/margins_1": 2.915614664554596, "rewards_train/margins_2": 3.423761248588562, "step": 342 }, { "epoch": 1.02, "logps_train/policy_1_2": -203.98715209960938, "logps_train/policy_1_l": -164.66006469726562, "logps_train/policy_1_w": -141.53363037109375, "logps_train/policy_2_2": -140.376708984375, "logps_train/policy_2_w": -215.24656677246094, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -0.5752776861190796, "rewards_train/1-l": -1.586806297302246, "rewards_train/1-w": 3.7591378688812256, "rewards_train/2-2": 3.3754138946533203, "rewards_train/2-w": -1.1230952739715576, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.345944166183472, "rewards_train/margins_1": 4.334415555000305, "rewards_train/margins_2": 4.498509168624878, "step": 342 }, { "epoch": 1.02, "logps_train/policy_1_2": -166.5133056640625, "logps_train/policy_1_l": -127.87335205078125, "logps_train/policy_1_w": -115.35741424560547, "logps_train/policy_2_2": -104.30585479736328, "logps_train/policy_2_w": -186.251220703125, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -107.5, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -1.7915644645690918, "rewards_train/1-l": -2.0302064418792725, "rewards_train/1-w": 3.0772476196289062, "rewards_train/2-2": 2.315117835998535, "rewards_train/2-w": -1.5224860906600952, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.107454061508179, "rewards_train/margins_1": 4.868812084197998, "rewards_train/margins_2": 3.8376039266586304, "step": 342 }, { "epoch": 1.02, "logps_train/policy_1_2": -172.186767578125, "logps_train/policy_1_l": -192.89718627929688, "logps_train/policy_1_w": -104.79012298583984, "logps_train/policy_2_2": -104.21023559570312, "logps_train/policy_2_w": -161.84197998046875, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": -0.9757074117660522, "rewards_train/1-l": -2.6144254207611084, "rewards_train/1-w": 2.83329176902771, "rewards_train/2-2": 3.2449915409088135, "rewards_train/2-w": -0.7576361298561096, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.447717189788818, "rewards_train/margins_1": 3.808999180793762, "rewards_train/margins_2": 4.002627670764923, "step": 342 }, { "epoch": 1.03, "logps_train/policy_1_2": -201.2355194091797, "logps_train/policy_1_l": -157.74343872070312, "logps_train/policy_1_w": -147.81187438964844, "logps_train/policy_2_2": -133.79931640625, "logps_train/policy_2_w": -212.33326721191406, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.4557782411575317, "rewards_train/1-l": -1.9157021045684814, "rewards_train/1-w": 2.6969380378723145, "rewards_train/2-2": 2.7271969318389893, "rewards_train/2-w": -1.5708280801773071, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.612640142440796, "rewards_train/margins_1": 4.152716279029846, "rewards_train/margins_2": 4.298025012016296, "step": 343 }, { "epoch": 1.03, "logps_train/policy_1_2": -298.19140625, "logps_train/policy_1_l": -201.99456787109375, "logps_train/policy_1_w": -150.16390991210938, "logps_train/policy_2_2": -203.2992706298828, "logps_train/policy_2_w": -231.27911376953125, "logps_train/ref_1_2": -272.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -243.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -2.5931622982025146, "rewards_train/1-l": -2.406879186630249, "rewards_train/1-w": 3.7282373905181885, "rewards_train/2-2": 3.9666550159454346, "rewards_train/2-w": -1.3548645973205566, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.1351165771484375, "rewards_train/margins_1": 6.321399688720703, "rewards_train/margins_2": 5.321519613265991, "step": 343 }, { "epoch": 1.03, "logps_train/policy_1_2": -183.52224731445312, "logps_train/policy_1_l": -109.6515121459961, "logps_train/policy_1_w": -81.11168670654297, "logps_train/policy_2_2": -120.07355499267578, "logps_train/policy_2_w": -130.8738555908203, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": -1.1569119691848755, "rewards_train/1-l": -2.107680559158325, "rewards_train/1-w": 2.504066228866577, "rewards_train/2-2": 2.793426513671875, "rewards_train/2-w": -0.6397300958633423, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.611746788024902, "rewards_train/margins_1": 3.6609781980514526, "rewards_train/margins_2": 3.4331566095352173, "step": 343 }, { "epoch": 1.03, "logps_train/policy_1_2": -136.8263397216797, "logps_train/policy_1_l": -143.75601196289062, "logps_train/policy_1_w": -82.90267944335938, "logps_train/policy_2_2": -88.14435577392578, "logps_train/policy_2_w": -145.46946716308594, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": -0.3054863512516022, "rewards_train/1-l": -1.7276999950408936, "rewards_train/1-w": 2.298696517944336, "rewards_train/2-2": 2.492302894592285, "rewards_train/2-w": -1.7137439250946045, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.0263965129852295, "rewards_train/margins_1": 2.604182869195938, "rewards_train/margins_2": 4.20604681968689, "step": 343 }, { "epoch": 1.03, "learning_rate": 2.598766511751545e-06, "loss": 0.7075, "step": 344 }, { "epoch": 1.03, "logps_train/policy_1_2": -223.4587860107422, "logps_train/policy_1_l": -181.80581665039062, "logps_train/policy_1_w": -152.57421875, "logps_train/policy_2_2": -140.48297119140625, "logps_train/policy_2_w": -238.89207458496094, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -1.6372849941253662, "rewards_train/1-l": -2.0231592655181885, "rewards_train/1-w": 4.10585880279541, "rewards_train/2-2": 3.6485798358917236, "rewards_train/2-w": -1.6220182180404663, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.129018068313599, "rewards_train/margins_1": 5.743143796920776, "rewards_train/margins_2": 5.27059805393219, "step": 344 }, { "epoch": 1.03, "logps_train/policy_1_2": -99.93226623535156, "logps_train/policy_1_l": -103.75303649902344, "logps_train/policy_1_w": -70.11064147949219, "logps_train/policy_2_2": -58.284278869628906, "logps_train/policy_2_w": -134.21507263183594, "logps_train/ref_1_2": -87.0, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -71.0, "logps_train/ref_2_w": -117.5, "rewards_train/1-2": -1.2746719121932983, "rewards_train/1-l": -1.8767683506011963, "rewards_train/1-w": 2.102998733520508, "rewards_train/2-2": 1.2608299255371094, "rewards_train/2-w": -1.6761951446533203, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.979767084121704, "rewards_train/margins_1": 3.377670645713806, "rewards_train/margins_2": 2.9370250701904297, "step": 344 }, { "epoch": 1.03, "logps_train/policy_1_2": -217.80335998535156, "logps_train/policy_1_l": -222.01641845703125, "logps_train/policy_1_w": -146.72337341308594, "logps_train/policy_2_2": -143.7100830078125, "logps_train/policy_2_w": -238.74143981933594, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": -2.018617630004883, "rewards_train/1-l": -2.226055145263672, "rewards_train/1-w": 3.446803569793701, "rewards_train/2-2": 3.0311403274536133, "rewards_train/2-w": -2.7917215824127197, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.672858715057373, "rewards_train/margins_1": 5.465421199798584, "rewards_train/margins_2": 5.822861909866333, "step": 344 }, { "epoch": 1.03, "logps_train/policy_1_2": -156.42794799804688, "logps_train/policy_1_l": -151.24588012695312, "logps_train/policy_1_w": -120.854248046875, "logps_train/policy_2_2": -107.58903503417969, "logps_train/policy_2_w": -183.33380126953125, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.5494362115859985, "rewards_train/1-l": -1.674734354019165, "rewards_train/1-w": 3.138720989227295, "rewards_train/2-2": 2.7489089965820312, "rewards_train/2-w": -1.3950982093811035, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.81345534324646, "rewards_train/margins_1": 3.6881572008132935, "rewards_train/margins_2": 4.144007205963135, "step": 344 }, { "epoch": 1.03, "logps_train/policy_1_2": -134.9404296875, "logps_train/policy_1_l": -141.09500122070312, "logps_train/policy_1_w": -166.8341522216797, "logps_train/policy_2_2": -83.8851318359375, "logps_train/policy_2_w": -245.57943725585938, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": -0.6565422415733337, "rewards_train/1-l": -1.6528596878051758, "rewards_train/1-w": 3.2840662002563477, "rewards_train/2-2": 2.5466432571411133, "rewards_train/2-w": -1.3776699304580688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.936925888061523, "rewards_train/margins_1": 3.9406084418296814, "rewards_train/margins_2": 3.924313187599182, "step": 345 }, { "epoch": 1.03, "logps_train/policy_1_2": -232.12008666992188, "logps_train/policy_1_l": -198.72213745117188, "logps_train/policy_1_w": -122.0142822265625, "logps_train/policy_2_2": -169.94873046875, "logps_train/policy_2_w": -181.29493713378906, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -0.81083744764328, "rewards_train/1-l": -2.719381332397461, "rewards_train/1-w": 3.0097053050994873, "rewards_train/2-2": 3.1917481422424316, "rewards_train/2-w": -0.9773446321487427, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.729086637496948, "rewards_train/margins_1": 3.8205427527427673, "rewards_train/margins_2": 4.169092774391174, "step": 345 }, { "epoch": 1.03, "logps_train/policy_1_2": -180.34982299804688, "logps_train/policy_1_l": -183.79832458496094, "logps_train/policy_1_w": -142.858154296875, "logps_train/policy_2_2": -125.38066101074219, "logps_train/policy_2_w": -207.82363891601562, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.8949422240257263, "rewards_train/1-l": -2.093503475189209, "rewards_train/1-w": 2.78664493560791, "rewards_train/2-2": 2.649433135986328, "rewards_train/2-w": -1.5667390823364258, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.880148410797119, "rewards_train/margins_1": 3.6815871596336365, "rewards_train/margins_2": 4.216172218322754, "step": 345 }, { "epoch": 1.03, "logps_train/policy_1_2": -93.94573974609375, "logps_train/policy_1_l": -129.74728393554688, "logps_train/policy_1_w": -71.84576416015625, "logps_train/policy_2_2": -62.17877197265625, "logps_train/policy_2_w": -116.59717559814453, "logps_train/ref_1_2": -89.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -77.5, "logps_train/ref_2_w": -106.5, "rewards_train/1-2": -0.49489128589630127, "rewards_train/1-l": -2.098165988922119, "rewards_train/1-w": 1.9121028184890747, "rewards_train/2-2": 1.533941626548767, "rewards_train/2-w": -0.9995617866516113, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.010268807411194, "rewards_train/margins_1": 2.406994104385376, "rewards_train/margins_2": 2.5335034132003784, "step": 345 }, { "epoch": 1.04, "learning_rate": 2.5740833195563996e-06, "loss": 0.7378, "step": 346 }, { "epoch": 1.04, "logps_train/policy_1_2": -244.41021728515625, "logps_train/policy_1_l": -166.79319763183594, "logps_train/policy_1_w": -133.44345092773438, "logps_train/policy_2_2": -168.30770874023438, "logps_train/policy_2_w": -186.67547607421875, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -205.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -0.9095278978347778, "rewards_train/1-l": -2.203441858291626, "rewards_train/1-w": 3.0430569648742676, "rewards_train/2-2": 3.670889377593994, "rewards_train/2-w": -0.2784866392612457, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.2464988231658936, "rewards_train/margins_1": 3.9525848627090454, "rewards_train/margins_2": 3.94937601685524, "step": 346 }, { "epoch": 1.04, "logps_train/policy_1_2": -176.5247802734375, "logps_train/policy_1_l": -147.34747314453125, "logps_train/policy_1_w": -113.94770812988281, "logps_train/policy_2_2": -120.29205322265625, "logps_train/policy_2_w": -173.22567749023438, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": -0.5384153127670288, "rewards_train/1-l": -2.0306458473205566, "rewards_train/1-w": 3.1411678791046143, "rewards_train/2-2": 2.9536075592041016, "rewards_train/2-w": -0.5967862010002136, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.171813726425171, "rewards_train/margins_1": 3.679583191871643, "rewards_train/margins_2": 3.550393760204315, "step": 346 }, { "epoch": 1.04, "logps_train/policy_1_2": -201.22296142578125, "logps_train/policy_1_l": -147.08450317382812, "logps_train/policy_1_w": -97.78761291503906, "logps_train/policy_2_2": -132.30255126953125, "logps_train/policy_2_w": -154.94134521484375, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -1.4812803268432617, "rewards_train/1-l": -2.178762435913086, "rewards_train/1-w": 2.927708864212036, "rewards_train/2-2": 2.932636260986328, "rewards_train/2-w": -0.8769471645355225, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.106471300125122, "rewards_train/margins_1": 4.408989191055298, "rewards_train/margins_2": 3.8095834255218506, "step": 346 }, { "epoch": 1.04, "logps_train/policy_1_2": -205.8537139892578, "logps_train/policy_1_l": -201.723876953125, "logps_train/policy_1_w": -131.29144287109375, "logps_train/policy_2_2": -126.82119750976562, "logps_train/policy_2_w": -211.5518035888672, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.7720911502838135, "rewards_train/1-l": -2.717895984649658, "rewards_train/1-w": 3.397418975830078, "rewards_train/2-2": 3.062411308288574, "rewards_train/2-w": -1.3352584838867188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.115314960479736, "rewards_train/margins_1": 5.169510126113892, "rewards_train/margins_2": 4.397669792175293, "step": 346 }, { "epoch": 1.04, "logps_train/policy_1_2": -130.90203857421875, "logps_train/policy_1_l": -155.55068969726562, "logps_train/policy_1_w": -99.05595397949219, "logps_train/policy_2_2": -85.49540710449219, "logps_train/policy_2_w": -157.07762145996094, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.6237982511520386, "rewards_train/1-l": -1.8636881113052368, "rewards_train/1-w": 3.041865348815918, "rewards_train/2-2": 2.2637410163879395, "rewards_train/2-w": -0.9382317066192627, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.905553460121155, "rewards_train/margins_1": 3.6656635999679565, "rewards_train/margins_2": 3.201972723007202, "step": 347 }, { "epoch": 1.04, "logps_train/policy_1_2": -147.24148559570312, "logps_train/policy_1_l": -130.95135498046875, "logps_train/policy_1_w": -122.29353332519531, "logps_train/policy_2_2": -99.70331573486328, "logps_train/policy_2_w": -195.53140258789062, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -0.41711699962615967, "rewards_train/1-l": -1.8201348781585693, "rewards_train/1-w": 3.2683024406433105, "rewards_train/2-2": 2.6361143589019775, "rewards_train/2-w": -1.810560941696167, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.08843731880188, "rewards_train/margins_1": 3.68541944026947, "rewards_train/margins_2": 4.4466753005981445, "step": 347 }, { "epoch": 1.04, "logps_train/policy_1_2": -174.71124267578125, "logps_train/policy_1_l": -169.3013153076172, "logps_train/policy_1_w": -72.7904052734375, "logps_train/policy_2_2": -105.33001708984375, "logps_train/policy_2_w": -115.64302062988281, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -111.5, "rewards_train/1-2": -2.077179431915283, "rewards_train/1-l": -2.8526411056518555, "rewards_train/1-w": 2.33892822265625, "rewards_train/2-2": 2.7481021881103516, "rewards_train/2-w": -0.4022897779941559, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.1915693283081055, "rewards_train/margins_1": 4.416107654571533, "rewards_train/margins_2": 3.1503919661045074, "step": 347 }, { "epoch": 1.04, "logps_train/policy_1_2": -147.46328735351562, "logps_train/policy_1_l": -102.18863677978516, "logps_train/policy_1_w": -81.54690551757812, "logps_train/policy_2_2": -85.982177734375, "logps_train/policy_2_w": -146.49871826171875, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -86.0, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -1.3611730337142944, "rewards_train/1-l": -1.6501131057739258, "rewards_train/1-w": 2.3972623348236084, "rewards_train/2-2": 2.576782464981079, "rewards_train/2-w": -1.6768243312835693, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.047375440597534, "rewards_train/margins_1": 3.758435368537903, "rewards_train/margins_2": 4.253606796264648, "step": 347 }, { "epoch": 1.04, "learning_rate": 2.5493928969598664e-06, "loss": 0.7179, "step": 348 }, { "epoch": 1.04, "logps_train/policy_1_2": -154.07855224609375, "logps_train/policy_1_l": -184.08787536621094, "logps_train/policy_1_w": -141.212646484375, "logps_train/policy_2_2": -112.7376708984375, "logps_train/policy_2_w": -199.9626007080078, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -0.27445629239082336, "rewards_train/1-l": -2.329636335372925, "rewards_train/1-w": 3.029906749725342, "rewards_train/2-2": 2.344788074493408, "rewards_train/2-w": -0.8802444934844971, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.359543085098267, "rewards_train/margins_1": 3.304363042116165, "rewards_train/margins_2": 3.2250325679779053, "step": 348 }, { "epoch": 1.04, "logps_train/policy_1_2": -169.6119384765625, "logps_train/policy_1_l": -181.54917907714844, "logps_train/policy_1_w": -123.99742889404297, "logps_train/policy_2_2": -104.232421875, "logps_train/policy_2_w": -188.8564910888672, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -1.6994750499725342, "rewards_train/1-l": -2.487706184387207, "rewards_train/1-w": 2.6658830642700195, "rewards_train/2-2": 2.5972652435302734, "rewards_train/2-w": -1.4333055019378662, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.153589248657227, "rewards_train/margins_1": 4.365358114242554, "rewards_train/margins_2": 4.03057074546814, "step": 348 }, { "epoch": 1.04, "logps_train/policy_1_2": -152.92630004882812, "logps_train/policy_1_l": -121.0561752319336, "logps_train/policy_1_w": -87.00016784667969, "logps_train/policy_2_2": -101.02180480957031, "logps_train/policy_2_w": -130.0633087158203, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": -1.089699387550354, "rewards_train/1-l": -2.0484890937805176, "rewards_train/1-w": 2.0105299949645996, "rewards_train/2-2": 2.388444185256958, "rewards_train/2-w": -0.9008609056472778, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.059019088745117, "rewards_train/margins_1": 3.1002293825149536, "rewards_train/margins_2": 3.289305090904236, "step": 348 }, { "epoch": 1.04, "logps_train/policy_1_2": -166.82542419433594, "logps_train/policy_1_l": -95.72598266601562, "logps_train/policy_1_w": -80.62408447265625, "logps_train/policy_2_2": -107.17506408691406, "logps_train/policy_2_w": -135.07284545898438, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -81.5, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": -1.7622296810150146, "rewards_train/1-l": -1.4419829845428467, "rewards_train/1-w": 2.064544200897217, "rewards_train/2-2": 2.169602632522583, "rewards_train/2-w": -1.490682601928711, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.5065271854400635, "rewards_train/margins_1": 3.8267738819122314, "rewards_train/margins_2": 3.660285234451294, "step": 348 }, { "epoch": 1.04, "logps_train/policy_1_2": -141.4199981689453, "logps_train/policy_1_l": -127.84963989257812, "logps_train/policy_1_w": -110.5669174194336, "logps_train/policy_2_2": -85.63750457763672, "logps_train/policy_2_w": -170.442138671875, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -1.0720775127410889, "rewards_train/1-l": -1.6847056150436401, "rewards_train/1-w": 2.3456525802612305, "rewards_train/2-2": 2.082343578338623, "rewards_train/2-w": -1.7489018440246582, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.030358195304871, "rewards_train/margins_1": 3.4177300930023193, "rewards_train/margins_2": 3.8312454223632812, "step": 349 }, { "epoch": 1.04, "logps_train/policy_1_2": -186.2027587890625, "logps_train/policy_1_l": -223.49185180664062, "logps_train/policy_1_w": -139.55734252929688, "logps_train/policy_2_2": -117.1751708984375, "logps_train/policy_2_w": -226.533203125, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -1.2237908840179443, "rewards_train/1-l": -3.189566135406494, "rewards_train/1-w": 3.800515651702881, "rewards_train/2-2": 3.241466522216797, "rewards_train/2-w": -2.0462899208068848, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.990081787109375, "rewards_train/margins_1": 5.024306535720825, "rewards_train/margins_2": 5.287756443023682, "step": 349 }, { "epoch": 1.04, "logps_train/policy_1_2": -126.4499282836914, "logps_train/policy_1_l": -134.5446319580078, "logps_train/policy_1_w": -91.89453125, "logps_train/policy_2_2": -89.1401138305664, "logps_train/policy_2_w": -153.8511962890625, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -0.4963599145412445, "rewards_train/1-l": -2.0016307830810547, "rewards_train/1-w": 2.7428717613220215, "rewards_train/2-2": 1.9361841678619385, "rewards_train/2-w": -1.2966426610946655, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.744502544403076, "rewards_train/margins_1": 3.239231675863266, "rewards_train/margins_2": 3.232826828956604, "step": 349 }, { "epoch": 1.04, "logps_train/policy_1_2": -175.36026000976562, "logps_train/policy_1_l": -153.56045532226562, "logps_train/policy_1_w": -129.33425903320312, "logps_train/policy_2_2": -103.92079162597656, "logps_train/policy_2_w": -213.5927276611328, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -2.2606358528137207, "rewards_train/1-l": -2.4232325553894043, "rewards_train/1-w": 2.005636215209961, "rewards_train/2-2": 2.320225238800049, "rewards_train/2-w": -3.276069402694702, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.428868770599365, "rewards_train/margins_1": 4.266272068023682, "rewards_train/margins_2": 5.596294641494751, "step": 349 }, { "epoch": 1.05, "learning_rate": 2.5246976537036646e-06, "loss": 0.9331, "step": 350 }, { "epoch": 1.05, "logps_train/policy_1_2": -171.21817016601562, "logps_train/policy_1_l": -140.1999053955078, "logps_train/policy_1_w": -125.13401794433594, "logps_train/policy_2_2": -101.93309783935547, "logps_train/policy_2_w": -206.23312377929688, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -2.194253921508789, "rewards_train/1-l": -1.6477978229522705, "rewards_train/1-w": 2.8660168647766113, "rewards_train/2-2": 2.19553279876709, "rewards_train/2-w": -2.74548077583313, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.513814687728882, "rewards_train/margins_1": 5.0602707862854, "rewards_train/margins_2": 4.94101357460022, "step": 350 }, { "epoch": 1.05, "logps_train/policy_1_2": -163.91497802734375, "logps_train/policy_1_l": -177.43789672851562, "logps_train/policy_1_w": -143.43263244628906, "logps_train/policy_2_2": -107.79289245605469, "logps_train/policy_2_w": -221.5103759765625, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.18837308883667, "rewards_train/1-l": -2.6563873291015625, "rewards_train/1-w": 2.607713222503662, "rewards_train/2-2": 2.2509841918945312, "rewards_train/2-w": -2.2955687046051025, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.264100551605225, "rewards_train/margins_1": 3.796086311340332, "rewards_train/margins_2": 4.546552896499634, "step": 350 }, { "epoch": 1.05, "logps_train/policy_1_2": -197.30368041992188, "logps_train/policy_1_l": -227.4715576171875, "logps_train/policy_1_w": -121.85704040527344, "logps_train/policy_2_2": -129.74769592285156, "logps_train/policy_2_w": -179.44171142578125, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.0178686380386353, "rewards_train/1-l": -2.49617862701416, "rewards_train/1-w": 3.3838272094726562, "rewards_train/2-2": 3.2713232040405273, "rewards_train/2-w": -0.3965156674385071, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.880005836486816, "rewards_train/margins_1": 4.4016958475112915, "rewards_train/margins_2": 3.6678388714790344, "step": 350 }, { "epoch": 1.05, "logps_train/policy_1_2": -161.82003784179688, "logps_train/policy_1_l": -141.86898803710938, "logps_train/policy_1_w": -118.94898986816406, "logps_train/policy_2_2": -102.11358642578125, "logps_train/policy_2_w": -191.70443725585938, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.356222152709961, "rewards_train/1-l": -1.4909025430679321, "rewards_train/1-w": 3.012913227081299, "rewards_train/2-2": 2.3771181106567383, "rewards_train/2-w": -1.6181005239486694, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.503815770149231, "rewards_train/margins_1": 4.36913537979126, "rewards_train/margins_2": 3.9952186346054077, "step": 350 }, { "epoch": 1.05, "logps_train/policy_1_2": -215.18052673339844, "logps_train/policy_1_l": -205.50503540039062, "logps_train/policy_1_w": -130.8352508544922, "logps_train/policy_2_2": -141.3993377685547, "logps_train/policy_2_w": -192.38067626953125, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.792271614074707, "rewards_train/1-l": -2.839662551879883, "rewards_train/1-w": 3.2518272399902344, "rewards_train/2-2": 2.9335036277770996, "rewards_train/2-w": -0.5993953943252563, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.091489791870117, "rewards_train/margins_1": 5.044098854064941, "rewards_train/margins_2": 3.532899022102356, "step": 351 }, { "epoch": 1.05, "logps_train/policy_1_2": -194.97442626953125, "logps_train/policy_1_l": -218.96157836914062, "logps_train/policy_1_w": -102.39376068115234, "logps_train/policy_2_2": -136.06190490722656, "logps_train/policy_2_w": -144.83395385742188, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -1.2747864723205566, "rewards_train/1-l": -2.9138827323913574, "rewards_train/1-w": 2.701639175415039, "rewards_train/2-2": 2.4621691703796387, "rewards_train/2-w": 0.11152699589729309, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.6155219078063965, "rewards_train/margins_1": 3.9764256477355957, "rewards_train/margins_2": 2.3506421744823456, "step": 351 }, { "epoch": 1.05, "logps_train/policy_1_2": -244.87185668945312, "logps_train/policy_1_l": -239.33189392089844, "logps_train/policy_1_w": -132.87254333496094, "logps_train/policy_2_2": -160.14666748046875, "logps_train/policy_2_w": -223.9024658203125, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -2.36609148979187, "rewards_train/1-l": -3.0485222339630127, "rewards_train/1-w": 3.768214702606201, "rewards_train/2-2": 2.976348876953125, "rewards_train/2-w": -1.5683724880218506, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.816736936569214, "rewards_train/margins_1": 6.134306192398071, "rewards_train/margins_2": 4.544721364974976, "step": 351 }, { "epoch": 1.05, "logps_train/policy_1_2": -205.70579528808594, "logps_train/policy_1_l": -193.31265258789062, "logps_train/policy_1_w": -106.37285614013672, "logps_train/policy_2_2": -125.72307586669922, "logps_train/policy_2_w": -192.9306640625, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -2.1197495460510254, "rewards_train/1-l": -3.111611843109131, "rewards_train/1-w": 3.356463670730591, "rewards_train/2-2": 3.1726624965667725, "rewards_train/2-w": -1.858691930770874, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.468075513839722, "rewards_train/margins_1": 5.476213216781616, "rewards_train/margins_2": 5.0313544273376465, "step": 351 }, { "epoch": 1.05, "learning_rate": 2.5e-06, "loss": 0.6734, "step": 352 }, { "epoch": 1.05, "logps_train/policy_1_2": -173.696533203125, "logps_train/policy_1_l": -143.19479370117188, "logps_train/policy_1_w": -144.82278442382812, "logps_train/policy_2_2": -120.64216613769531, "logps_train/policy_2_w": -204.7255859375, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.6805909872055054, "rewards_train/1-l": -1.9967741966247559, "rewards_train/1-w": 2.5282673835754395, "rewards_train/2-2": 2.9131269454956055, "rewards_train/2-w": -1.341698408126831, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.525041580200195, "rewards_train/margins_1": 3.208858370780945, "rewards_train/margins_2": 4.2548253536224365, "step": 352 }, { "epoch": 1.05, "logps_train/policy_1_2": -191.8589630126953, "logps_train/policy_1_l": -168.5662841796875, "logps_train/policy_1_w": -126.29354095458984, "logps_train/policy_2_2": -132.2276611328125, "logps_train/policy_2_w": -190.49581909179688, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.691755771636963, "rewards_train/1-l": -2.457213878631592, "rewards_train/1-w": 3.3007242679595947, "rewards_train/2-2": 2.4155147075653076, "rewards_train/2-w": -1.0171594619750977, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.7579381465911865, "rewards_train/margins_1": 4.992480039596558, "rewards_train/margins_2": 3.4326741695404053, "step": 352 }, { "epoch": 1.05, "logps_train/policy_1_2": -172.08261108398438, "logps_train/policy_1_l": -168.76821899414062, "logps_train/policy_1_w": -124.72065734863281, "logps_train/policy_2_2": -115.97325134277344, "logps_train/policy_2_w": -197.284423828125, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -0.9000580906867981, "rewards_train/1-l": -1.9758946895599365, "rewards_train/1-w": 2.9814507961273193, "rewards_train/2-2": 2.565760612487793, "rewards_train/2-w": -1.8835206031799316, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.957345485687256, "rewards_train/margins_1": 3.8815088868141174, "rewards_train/margins_2": 4.449281215667725, "step": 352 }, { "epoch": 1.05, "logps_train/policy_1_2": -234.42173767089844, "logps_train/policy_1_l": -162.887451171875, "logps_train/policy_1_w": -126.27388763427734, "logps_train/policy_2_2": -161.16708374023438, "logps_train/policy_2_w": -186.08950805664062, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.7749848365783691, "rewards_train/1-l": -1.9919685125350952, "rewards_train/1-w": 3.3374552726745605, "rewards_train/2-2": 3.4121975898742676, "rewards_train/2-w": -0.6257463693618774, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.329423785209656, "rewards_train/margins_1": 5.11244010925293, "rewards_train/margins_2": 4.037943959236145, "step": 352 }, { "epoch": 1.06, "logps_train/policy_1_2": -166.20481872558594, "logps_train/policy_1_l": -200.8269805908203, "logps_train/policy_1_w": -123.19855499267578, "logps_train/policy_2_2": -112.48770904541016, "logps_train/policy_2_w": -199.0628662109375, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -0.45544368028640747, "rewards_train/1-l": -3.1280102729797363, "rewards_train/1-w": 2.743034839630127, "rewards_train/2-2": 2.8892176151275635, "rewards_train/2-w": -2.104332447052002, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.871045112609863, "rewards_train/margins_1": 3.1984785199165344, "rewards_train/margins_2": 4.993550062179565, "step": 353 }, { "epoch": 1.06, "logps_train/policy_1_2": -248.15835571289062, "logps_train/policy_1_l": -165.92022705078125, "logps_train/policy_1_w": -101.92547607421875, "logps_train/policy_2_2": -145.3223419189453, "logps_train/policy_2_w": -189.31735229492188, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -3.1236495971679688, "rewards_train/1-l": -2.5024728775024414, "rewards_train/1-w": 3.1449522972106934, "rewards_train/2-2": 3.3525309562683105, "rewards_train/2-w": -1.9500939846038818, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.647425174713135, "rewards_train/margins_1": 6.268601894378662, "rewards_train/margins_2": 5.302624940872192, "step": 353 }, { "epoch": 1.06, "logps_train/policy_1_2": -146.18211364746094, "logps_train/policy_1_l": -152.95220947265625, "logps_train/policy_1_w": -135.22113037109375, "logps_train/policy_2_2": -100.70236206054688, "logps_train/policy_2_w": -193.03866577148438, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -0.43129679560661316, "rewards_train/1-l": -1.7117254734039307, "rewards_train/1-w": 2.576910972595215, "rewards_train/2-2": 2.3434360027313232, "rewards_train/2-w": -1.149571180343628, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.2886364459991455, "rewards_train/margins_1": 3.008207768201828, "rewards_train/margins_2": 3.493007183074951, "step": 353 }, { "epoch": 1.06, "logps_train/policy_1_2": -156.5914306640625, "logps_train/policy_1_l": -161.9834747314453, "logps_train/policy_1_w": -103.0667724609375, "logps_train/policy_2_2": -95.60828399658203, "logps_train/policy_2_w": -161.69110107421875, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -1.1194934844970703, "rewards_train/1-l": -2.1167073249816895, "rewards_train/1-w": 2.22867488861084, "rewards_train/2-2": 2.8477652072906494, "rewards_train/2-w": -1.5439138412475586, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.345382213592529, "rewards_train/margins_1": 3.34816837310791, "rewards_train/margins_2": 4.391679048538208, "step": 353 }, { "epoch": 1.06, "learning_rate": 2.4753023462963363e-06, "loss": 0.835, "step": 354 }, { "epoch": 1.06, "logps_train/policy_1_2": -215.41656494140625, "logps_train/policy_1_l": -205.90176391601562, "logps_train/policy_1_w": -145.88308715820312, "logps_train/policy_2_2": -149.485595703125, "logps_train/policy_2_w": -218.0377197265625, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.4592344760894775, "rewards_train/1-l": -2.494180202484131, "rewards_train/1-w": 2.940988779067993, "rewards_train/2-2": 3.1807379722595215, "rewards_train/2-w": -1.7772103548049927, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.435168981552124, "rewards_train/margins_1": 4.400223255157471, "rewards_train/margins_2": 4.957948327064514, "step": 354 }, { "epoch": 1.06, "logps_train/policy_1_2": -114.137939453125, "logps_train/policy_1_l": -144.6838836669922, "logps_train/policy_1_w": -89.53125, "logps_train/policy_2_2": -80.88101959228516, "logps_train/policy_2_w": -132.15960693359375, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": -0.23312994837760925, "rewards_train/1-l": -2.2913384437561035, "rewards_train/1-w": 2.071484088897705, "rewards_train/2-2": 1.9892420768737793, "rewards_train/2-w": -0.747991681098938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.362822532653809, "rewards_train/margins_1": 2.3046140372753143, "rewards_train/margins_2": 2.7372337579727173, "step": 354 }, { "epoch": 1.06, "logps_train/policy_1_2": -238.50831604003906, "logps_train/policy_1_l": -221.38153076171875, "logps_train/policy_1_w": -141.80392456054688, "logps_train/policy_2_2": -156.38287353515625, "logps_train/policy_2_w": -227.61795043945312, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -2.064894676208496, "rewards_train/1-l": -2.4520678520202637, "rewards_train/1-w": 3.5823025703430176, "rewards_train/2-2": 3.214837074279785, "rewards_train/2-w": -1.5879671573638916, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.034370422363281, "rewards_train/margins_1": 5.647197246551514, "rewards_train/margins_2": 4.802804231643677, "step": 354 }, { "epoch": 1.06, "logps_train/policy_1_2": -138.06996154785156, "logps_train/policy_1_l": -164.81883239746094, "logps_train/policy_1_w": -102.67774200439453, "logps_train/policy_2_2": -89.806640625, "logps_train/policy_2_w": -150.42652893066406, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -1.0857079029083252, "rewards_train/1-l": -1.8722269535064697, "rewards_train/1-w": 2.1486809253692627, "rewards_train/2-2": 2.1998047828674316, "rewards_train/2-w": -0.9393324851989746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.020907878875732, "rewards_train/margins_1": 3.234388828277588, "rewards_train/margins_2": 3.1391372680664062, "step": 354 }, { "epoch": 1.06, "logps_train/policy_1_2": -232.17454528808594, "logps_train/policy_1_l": -224.75782775878906, "logps_train/policy_1_w": -169.7661895751953, "logps_train/policy_2_2": -147.15814208984375, "logps_train/policy_2_w": -264.7071838378906, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -242.0, "rewards_train/1-2": -2.4238028526306152, "rewards_train/1-l": -2.140528678894043, "rewards_train/1-w": 3.804727554321289, "rewards_train/2-2": 3.27852201461792, "rewards_train/2-w": -2.2918591499328613, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.945256233215332, "rewards_train/margins_1": 6.228530406951904, "rewards_train/margins_2": 5.570381164550781, "step": 355 }, { "epoch": 1.06, "logps_train/policy_1_2": -216.299072265625, "logps_train/policy_1_l": -199.73788452148438, "logps_train/policy_1_w": -132.3935089111328, "logps_train/policy_2_2": -134.4165496826172, "logps_train/policy_2_w": -226.06533813476562, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -1.6541261672973633, "rewards_train/1-l": -2.437070369720459, "rewards_train/1-w": 3.577446460723877, "rewards_train/2-2": 3.4024853706359863, "rewards_train/2-w": -2.029971122741699, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.014516830444336, "rewards_train/margins_1": 5.23157262802124, "rewards_train/margins_2": 5.4324564933776855, "step": 355 }, { "epoch": 1.06, "logps_train/policy_1_2": -160.83279418945312, "logps_train/policy_1_l": -167.13070678710938, "logps_train/policy_1_w": -122.23402404785156, "logps_train/policy_2_2": -106.99423217773438, "logps_train/policy_2_w": -185.5009765625, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -0.9511511325836182, "rewards_train/1-l": -2.3341166973114014, "rewards_train/1-w": 2.7548201084136963, "rewards_train/2-2": 2.5196685791015625, "rewards_train/2-w": -1.2704098224639893, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.088936805725098, "rewards_train/margins_1": 3.7059712409973145, "rewards_train/margins_2": 3.7900784015655518, "step": 355 }, { "epoch": 1.06, "logps_train/policy_1_2": -204.07664489746094, "logps_train/policy_1_l": -221.85508728027344, "logps_train/policy_1_w": -138.4366455078125, "logps_train/policy_2_2": -148.21080017089844, "logps_train/policy_2_w": -211.81739807128906, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -191.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -0.6842266917228699, "rewards_train/1-l": -3.115880012512207, "rewards_train/1-w": 3.4809439182281494, "rewards_train/2-2": 2.975795030593872, "rewards_train/2-w": -1.4216806888580322, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.5968239307403564, "rewards_train/margins_1": 4.165170609951019, "rewards_train/margins_2": 4.397475719451904, "step": 355 }, { "epoch": 1.07, "learning_rate": 2.4506071030401345e-06, "loss": 0.6133, "step": 356 }, { "epoch": 1.07, "logps_train/policy_1_2": -153.27175903320312, "logps_train/policy_1_l": -146.08706665039062, "logps_train/policy_1_w": -88.63798522949219, "logps_train/policy_2_2": -115.62556457519531, "logps_train/policy_2_w": -126.3697738647461, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": -0.2869420349597931, "rewards_train/1-l": -2.1803855895996094, "rewards_train/1-w": 2.1291701793670654, "rewards_train/2-2": 2.2653732299804688, "rewards_train/2-w": -0.4096340835094452, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.309555768966675, "rewards_train/margins_1": 2.4161122143268585, "rewards_train/margins_2": 2.675007313489914, "step": 356 }, { "epoch": 1.07, "logps_train/policy_1_2": -226.20135498046875, "logps_train/policy_1_l": -200.30892944335938, "logps_train/policy_1_w": -140.83648681640625, "logps_train/policy_2_2": -163.77593994140625, "logps_train/policy_2_w": -216.8936767578125, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.1920123100280762, "rewards_train/1-l": -2.509798526763916, "rewards_train/1-w": 3.3280701637268066, "rewards_train/2-2": 3.1227967739105225, "rewards_train/2-w": -1.6331188678741455, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.837868690490723, "rewards_train/margins_1": 4.520082473754883, "rewards_train/margins_2": 4.755915641784668, "step": 356 }, { "epoch": 1.07, "logps_train/policy_1_2": -192.23721313476562, "logps_train/policy_1_l": -147.22921752929688, "logps_train/policy_1_w": -123.83509063720703, "logps_train/policy_2_2": -126.14678955078125, "logps_train/policy_2_w": -184.03956604003906, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -1.0171781778335571, "rewards_train/1-l": -1.5211641788482666, "rewards_train/1-w": 3.257897138595581, "rewards_train/2-2": 3.345085620880127, "rewards_train/2-w": -0.6625505685806274, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.779061317443848, "rewards_train/margins_1": 4.275075316429138, "rewards_train/margins_2": 4.007636189460754, "step": 356 }, { "epoch": 1.07, "logps_train/policy_1_2": -222.16165161132812, "logps_train/policy_1_l": -185.96292114257812, "logps_train/policy_1_w": -115.9454574584961, "logps_train/policy_2_2": -148.79544067382812, "logps_train/policy_2_w": -181.20462036132812, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.2974143028259277, "rewards_train/1-l": -1.9177770614624023, "rewards_train/1-w": 2.9204936027526855, "rewards_train/2-2": 3.3110814094543457, "rewards_train/2-w": -1.1372588872909546, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.838270664215088, "rewards_train/margins_1": 4.217907905578613, "rewards_train/margins_2": 4.4483402967453, "step": 356 }, { "epoch": 1.07, "logps_train/policy_1_2": -165.0605010986328, "logps_train/policy_1_l": -180.78912353515625, "logps_train/policy_1_w": -128.42230224609375, "logps_train/policy_2_2": -107.60824584960938, "logps_train/policy_2_w": -199.2758331298828, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -0.5164012312889099, "rewards_train/1-l": -2.5449271202087402, "rewards_train/1-w": 3.2724173069000244, "rewards_train/2-2": 3.198354721069336, "rewards_train/2-w": -1.1031686067581177, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.817344427108765, "rewards_train/margins_1": 3.7888185381889343, "rewards_train/margins_2": 4.301523327827454, "step": 357 }, { "epoch": 1.07, "logps_train/policy_1_2": -114.99417114257812, "logps_train/policy_1_l": -124.65766906738281, "logps_train/policy_1_w": -63.550289154052734, "logps_train/policy_2_2": -84.12662506103516, "logps_train/policy_2_w": -97.33720397949219, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -101.5, "logps_train/ref_1_w": -81.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": -0.22754231095314026, "rewards_train/1-l": -2.3270950317382812, "rewards_train/1-w": 1.7728736400604248, "rewards_train/2-2": 1.9099938869476318, "rewards_train/2-w": -0.449150025844574, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.099968671798706, "rewards_train/margins_1": 2.000415951013565, "rewards_train/margins_2": 2.359143912792206, "step": 357 }, { "epoch": 1.07, "logps_train/policy_1_2": -204.4392852783203, "logps_train/policy_1_l": -230.92662048339844, "logps_train/policy_1_w": -103.45661926269531, "logps_train/policy_2_2": -128.51919555664062, "logps_train/policy_2_w": -168.2747802734375, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.9029134511947632, "rewards_train/1-l": -3.0390987396240234, "rewards_train/1-w": 2.8411059379577637, "rewards_train/2-2": 3.2504234313964844, "rewards_train/2-w": -1.1380243301391602, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.880204677581787, "rewards_train/margins_1": 4.744019389152527, "rewards_train/margins_2": 4.3884477615356445, "step": 357 }, { "epoch": 1.07, "logps_train/policy_1_2": -139.88604736328125, "logps_train/policy_1_l": -153.34182739257812, "logps_train/policy_1_w": -100.77742004394531, "logps_train/policy_2_2": -93.61892700195312, "logps_train/policy_2_w": -154.22874450683594, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -0.7409496903419495, "rewards_train/1-l": -2.0224649906158447, "rewards_train/1-w": 2.1570236682891846, "rewards_train/2-2": 2.326387882232666, "rewards_train/2-w": -1.237718105316162, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.179488658905029, "rewards_train/margins_1": 2.897973358631134, "rewards_train/margins_2": 3.564105987548828, "step": 357 }, { "epoch": 1.07, "learning_rate": 2.4259166804436008e-06, "loss": 0.6794, "step": 358 }, { "epoch": 1.07, "logps_train/policy_1_2": -161.19992065429688, "logps_train/policy_1_l": -144.88790893554688, "logps_train/policy_1_w": -100.61064147949219, "logps_train/policy_2_2": -122.93009948730469, "logps_train/policy_2_w": -144.46170043945312, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -0.14987632632255554, "rewards_train/1-l": -1.3172576427459717, "rewards_train/1-w": 2.1936233043670654, "rewards_train/2-2": 2.556697368621826, "rewards_train/2-w": -0.5025169849395752, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.510880947113037, "rewards_train/margins_1": 2.343499630689621, "rewards_train/margins_2": 3.0592143535614014, "step": 358 }, { "epoch": 1.07, "logps_train/policy_1_2": -154.62571716308594, "logps_train/policy_1_l": -215.70314025878906, "logps_train/policy_1_w": -127.7138671875, "logps_train/policy_2_2": -94.5472183227539, "logps_train/policy_2_w": -207.6490478515625, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -191.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.0365948677062988, "rewards_train/1-l": -2.464845895767212, "rewards_train/1-w": 3.181151866912842, "rewards_train/2-2": 2.5234031677246094, "rewards_train/2-w": -1.795910120010376, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.645997762680054, "rewards_train/margins_1": 4.217746734619141, "rewards_train/margins_2": 4.319313287734985, "step": 358 }, { "epoch": 1.07, "logps_train/policy_1_2": -201.07778930664062, "logps_train/policy_1_l": -145.50164794921875, "logps_train/policy_1_w": -117.79161071777344, "logps_train/policy_2_2": -136.11053466796875, "logps_train/policy_2_w": -188.8941192626953, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -0.9679356813430786, "rewards_train/1-l": -1.3661075830459595, "rewards_train/1-w": 2.880995035171509, "rewards_train/2-2": 2.8586721420288086, "rewards_train/2-w": -1.5798414945602417, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.247102618217468, "rewards_train/margins_1": 3.8489307165145874, "rewards_train/margins_2": 4.43851363658905, "step": 358 }, { "epoch": 1.07, "logps_train/policy_1_2": -199.21380615234375, "logps_train/policy_1_l": -156.28677368164062, "logps_train/policy_1_w": -103.87101745605469, "logps_train/policy_2_2": -140.4742889404297, "logps_train/policy_2_w": -162.09335327148438, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.9428648948669434, "rewards_train/1-l": -1.6346834897994995, "rewards_train/1-w": 3.101179599761963, "rewards_train/2-2": 2.5348949432373047, "rewards_train/2-w": -0.27769505977630615, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.735863089561462, "rewards_train/margins_1": 4.044044494628906, "rewards_train/margins_2": 2.812590003013611, "step": 358 }, { "epoch": 1.07, "logps_train/policy_1_2": -137.82322692871094, "logps_train/policy_1_l": -127.52708435058594, "logps_train/policy_1_w": -71.57035827636719, "logps_train/policy_2_2": -84.5595703125, "logps_train/policy_2_w": -125.25728607177734, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -94.0, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -115.5, "rewards_train/1-2": -1.2193353176116943, "rewards_train/1-l": -1.8763422966003418, "rewards_train/1-w": 2.2181594371795654, "rewards_train/2-2": 2.2124505043029785, "rewards_train/2-w": -0.9878385066986084, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.094501733779907, "rewards_train/margins_1": 3.4374947547912598, "rewards_train/margins_2": 3.200289011001587, "step": 359 }, { "epoch": 1.07, "logps_train/policy_1_2": -92.2194595336914, "logps_train/policy_1_l": -129.44827270507812, "logps_train/policy_1_w": -95.8406982421875, "logps_train/policy_2_2": -64.70812225341797, "logps_train/policy_2_w": -153.24978637695312, "logps_train/ref_1_2": -91.5, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -81.5, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -0.06999287754297256, "rewards_train/1-l": -1.548636555671692, "rewards_train/1-w": 2.779796600341797, "rewards_train/2-2": 1.683093786239624, "rewards_train/2-w": -1.0630632638931274, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.328433156013489, "rewards_train/margins_1": 2.8497894778847694, "rewards_train/margins_2": 2.7461570501327515, "step": 359 }, { "epoch": 1.07, "logps_train/policy_1_2": -230.37464904785156, "logps_train/policy_1_l": -183.53060913085938, "logps_train/policy_1_w": -123.06757354736328, "logps_train/policy_2_2": -147.37673950195312, "logps_train/policy_2_w": -214.91665649414062, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.6872684955596924, "rewards_train/1-l": -2.0557479858398438, "rewards_train/1-w": 3.6768364906311035, "rewards_train/2-2": 3.5877161026000977, "rewards_train/2-w": -1.676040768623352, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.732584476470947, "rewards_train/margins_1": 5.364104986190796, "rewards_train/margins_2": 5.26375687122345, "step": 359 }, { "epoch": 1.07, "logps_train/policy_1_2": -135.58120727539062, "logps_train/policy_1_l": -134.40383911132812, "logps_train/policy_1_w": -103.87895202636719, "logps_train/policy_2_2": -88.08679962158203, "logps_train/policy_2_w": -145.318115234375, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -0.8051910996437073, "rewards_train/1-l": -1.4810090065002441, "rewards_train/1-w": 2.8507771492004395, "rewards_train/2-2": 2.1364376544952393, "rewards_train/2-w": 0.04865729808807373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.331786155700684, "rewards_train/margins_1": 3.6559682488441467, "rewards_train/margins_2": 2.0877803564071655, "step": 359 }, { "epoch": 1.08, "learning_rate": 2.4012334882484554e-06, "loss": 0.8193, "step": 360 }, { "epoch": 1.08, "logps_train/policy_1_2": -144.02520751953125, "logps_train/policy_1_l": -150.4962158203125, "logps_train/policy_1_w": -111.22854614257812, "logps_train/policy_2_2": -98.20365905761719, "logps_train/policy_2_w": -163.34310913085938, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -0.3368961215019226, "rewards_train/1-l": -1.7667590379714966, "rewards_train/1-w": 2.9597630500793457, "rewards_train/2-2": 2.5265092849731445, "rewards_train/2-w": -0.4874844551086426, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.726522088050842, "rewards_train/margins_1": 3.2966591715812683, "rewards_train/margins_2": 3.013993740081787, "step": 360 }, { "epoch": 1.08, "logps_train/policy_1_2": -124.78267669677734, "logps_train/policy_1_l": -99.52316284179688, "logps_train/policy_1_w": -54.041290283203125, "logps_train/policy_2_2": -73.33753967285156, "logps_train/policy_2_w": -98.04786682128906, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -86.0, "logps_train/ref_1_w": -71.5, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -88.0, "rewards_train/1-2": -1.3774864673614502, "rewards_train/1-l": -1.3636438846588135, "rewards_train/1-w": 1.7255584001541138, "rewards_train/2-2": 2.0894877910614014, "rewards_train/2-w": -1.018458604812622, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.0892022848129272, "rewards_train/margins_1": 3.103044867515564, "rewards_train/margins_2": 3.1079463958740234, "step": 360 }, { "epoch": 1.08, "logps_train/policy_1_2": -153.16445922851562, "logps_train/policy_1_l": -131.56500244140625, "logps_train/policy_1_w": -99.36862182617188, "logps_train/policy_2_2": -99.72216033935547, "logps_train/policy_2_w": -159.60195922851562, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -0.9125385284423828, "rewards_train/1-l": -1.3966851234436035, "rewards_train/1-w": 2.9855990409851074, "rewards_train/2-2": 2.4281744956970215, "rewards_train/2-w": -0.7326574325561523, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.382284164428711, "rewards_train/margins_1": 3.8981375694274902, "rewards_train/margins_2": 3.160831928253174, "step": 360 }, { "epoch": 1.08, "logps_train/policy_1_2": -224.60617065429688, "logps_train/policy_1_l": -205.82418823242188, "logps_train/policy_1_w": -155.17202758789062, "logps_train/policy_2_2": -157.16433715820312, "logps_train/policy_2_w": -222.60406494140625, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -0.6895244717597961, "rewards_train/1-l": -2.3176729679107666, "rewards_train/1-w": 3.3263516426086426, "rewards_train/2-2": 3.5140345096588135, "rewards_train/2-w": -1.0143139362335205, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.644024610519409, "rewards_train/margins_1": 4.015876114368439, "rewards_train/margins_2": 4.528348445892334, "step": 360 }, { "epoch": 1.08, "logps_train/policy_1_2": -178.39462280273438, "logps_train/policy_1_l": -151.39222717285156, "logps_train/policy_1_w": -107.88453674316406, "logps_train/policy_2_2": -116.1655044555664, "logps_train/policy_2_w": -161.36846923828125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -0.26993027329444885, "rewards_train/1-l": -1.7634427547454834, "rewards_train/1-w": 2.666624069213867, "rewards_train/2-2": 3.618605375289917, "rewards_train/2-w": -0.4499322772026062, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.430066823959351, "rewards_train/margins_1": 2.936554342508316, "rewards_train/margins_2": 4.068537652492523, "step": 361 }, { "epoch": 1.08, "logps_train/policy_1_2": -179.58883666992188, "logps_train/policy_1_l": -177.56996154785156, "logps_train/policy_1_w": -95.63861083984375, "logps_train/policy_2_2": -112.80058288574219, "logps_train/policy_2_w": -157.20689392089844, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -1.0600545406341553, "rewards_train/1-l": -2.116224527359009, "rewards_train/1-w": 3.0791072845458984, "rewards_train/2-2": 3.0924997329711914, "rewards_train/2-w": -0.8402209877967834, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.195331811904907, "rewards_train/margins_1": 4.139161825180054, "rewards_train/margins_2": 3.932720720767975, "step": 361 }, { "epoch": 1.08, "logps_train/policy_1_2": -184.08908081054688, "logps_train/policy_1_l": -135.5564422607422, "logps_train/policy_1_w": -104.53363037109375, "logps_train/policy_2_2": -121.42757415771484, "logps_train/policy_2_w": -179.77037048339844, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -0.9717978835105896, "rewards_train/1-l": -1.0684374570846558, "rewards_train/1-w": 2.9880428314208984, "rewards_train/2-2": 2.9952316284179688, "rewards_train/2-w": -1.5684432983398438, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.056480288505554, "rewards_train/margins_1": 3.959840714931488, "rewards_train/margins_2": 4.5636749267578125, "step": 361 }, { "epoch": 1.08, "logps_train/policy_1_2": -185.46713256835938, "logps_train/policy_1_l": -129.53050231933594, "logps_train/policy_1_w": -125.69783020019531, "logps_train/policy_2_2": -111.97525024414062, "logps_train/policy_2_w": -201.943603515625, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.7951511144638062, "rewards_train/1-l": -1.5878167152404785, "rewards_train/1-w": 3.354825973510742, "rewards_train/2-2": 2.782162666320801, "rewards_train/2-w": -1.7369377613067627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.942642688751221, "rewards_train/margins_1": 5.149977087974548, "rewards_train/margins_2": 4.5191004276275635, "step": 361 }, { "epoch": 1.08, "learning_rate": 2.376559935490743e-06, "loss": 0.6733, "step": 362 }, { "epoch": 1.08, "logps_train/policy_1_2": -116.16935729980469, "logps_train/policy_1_l": -102.77749633789062, "logps_train/policy_1_w": -92.91366577148438, "logps_train/policy_2_2": -77.68180847167969, "logps_train/policy_2_w": -141.40493774414062, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -89.5, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -0.43066203594207764, "rewards_train/1-l": -1.3502893447875977, "rewards_train/1-w": 2.957070827484131, "rewards_train/2-2": 2.233276605606079, "rewards_train/2-w": -0.30357977747917175, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.3073601722717285, "rewards_train/margins_1": 3.3877328634262085, "rewards_train/margins_2": 2.536856383085251, "step": 362 }, { "epoch": 1.08, "logps_train/policy_1_2": -148.511474609375, "logps_train/policy_1_l": -157.31820678710938, "logps_train/policy_1_w": -112.3152847290039, "logps_train/policy_2_2": -96.56019592285156, "logps_train/policy_2_w": -166.96951293945312, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.6339584589004517, "rewards_train/1-l": -1.4876810312271118, "rewards_train/1-w": 3.0677878856658936, "rewards_train/2-2": 2.520542621612549, "rewards_train/2-w": -0.712379515171051, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.555468916893005, "rewards_train/margins_1": 3.701746344566345, "rewards_train/margins_2": 3.2329221367836, "step": 362 }, { "epoch": 1.08, "logps_train/policy_1_2": -117.7647476196289, "logps_train/policy_1_l": -130.78045654296875, "logps_train/policy_1_w": -79.35273742675781, "logps_train/policy_2_2": -73.66083526611328, "logps_train/policy_2_w": -147.64556884765625, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -107.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -1.0952246189117432, "rewards_train/1-l": -1.6086108684539795, "rewards_train/1-w": 2.7512500286102295, "rewards_train/2-2": 1.9407527446746826, "rewards_train/2-w": -1.2497127056121826, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.359860897064209, "rewards_train/margins_1": 3.8464746475219727, "rewards_train/margins_2": 3.1904654502868652, "step": 362 }, { "epoch": 1.08, "logps_train/policy_1_2": -181.5117950439453, "logps_train/policy_1_l": -212.2000732421875, "logps_train/policy_1_w": -143.83828735351562, "logps_train/policy_2_2": -132.65475463867188, "logps_train/policy_2_w": -202.95083618164062, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -0.23789876699447632, "rewards_train/1-l": -1.8472033739089966, "rewards_train/1-w": 3.3780362606048584, "rewards_train/2-2": 2.8601107597351074, "rewards_train/2-w": -0.6188139319419861, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.225239634513855, "rewards_train/margins_1": 3.6159350275993347, "rewards_train/margins_2": 3.4789246916770935, "step": 362 }, { "epoch": 1.09, "logps_train/policy_1_2": -163.08767700195312, "logps_train/policy_1_l": -120.70262145996094, "logps_train/policy_1_w": -69.58377075195312, "logps_train/policy_2_2": -95.57521057128906, "logps_train/policy_2_w": -116.09646606445312, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -93.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": -1.509549617767334, "rewards_train/1-l": -1.4230455160140991, "rewards_train/1-w": 2.3242406845092773, "rewards_train/2-2": 2.7254867553710938, "rewards_train/2-w": -0.6348417401313782, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.7472862005233765, "rewards_train/margins_1": 3.8337903022766113, "rewards_train/margins_2": 3.360328495502472, "step": 363 }, { "epoch": 1.09, "logps_train/policy_1_2": -147.55152893066406, "logps_train/policy_1_l": -121.61418914794922, "logps_train/policy_1_w": -85.31112670898438, "logps_train/policy_2_2": -91.75865173339844, "logps_train/policy_2_w": -129.638671875, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -104.5, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": -1.081177830696106, "rewards_train/1-l": -1.678362488746643, "rewards_train/1-w": 2.475137710571289, "rewards_train/2-2": 2.5369768142700195, "rewards_train/2-w": -0.2810540199279785, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.153500199317932, "rewards_train/margins_1": 3.556315541267395, "rewards_train/margins_2": 2.818030834197998, "step": 363 }, { "epoch": 1.09, "logps_train/policy_1_2": -129.8193817138672, "logps_train/policy_1_l": -140.4494171142578, "logps_train/policy_1_w": -85.13185119628906, "logps_train/policy_2_2": -76.58329772949219, "logps_train/policy_2_w": -135.02166748046875, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": -1.0458062887191772, "rewards_train/1-l": -1.6756315231323242, "rewards_train/1-w": 2.515721321105957, "rewards_train/2-2": 2.6661813259124756, "rewards_train/2-w": -0.6670099496841431, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.191352844238281, "rewards_train/margins_1": 3.5615276098251343, "rewards_train/margins_2": 3.3331912755966187, "step": 363 }, { "epoch": 1.09, "logps_train/policy_1_2": -204.5082550048828, "logps_train/policy_1_l": -249.73147583007812, "logps_train/policy_1_w": -118.34919738769531, "logps_train/policy_2_2": -139.52532958984375, "logps_train/policy_2_w": -190.73568725585938, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -0.5357872247695923, "rewards_train/1-l": -2.342679500579834, "rewards_train/1-w": 2.977776050567627, "rewards_train/2-2": 3.281452178955078, "rewards_train/2-w": -1.5667316913604736, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.320455551147461, "rewards_train/margins_1": 3.5135632753372192, "rewards_train/margins_2": 4.848183870315552, "step": 363 }, { "epoch": 1.09, "learning_rate": 2.3518984302657146e-06, "loss": 0.6883, "step": 364 }, { "epoch": 1.09, "logps_train/policy_1_2": -144.30230712890625, "logps_train/policy_1_l": -132.149658203125, "logps_train/policy_1_w": -120.22313690185547, "logps_train/policy_2_2": -87.0643539428711, "logps_train/policy_2_w": -185.83644104003906, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -1.134527564048767, "rewards_train/1-l": -1.8256114721298218, "rewards_train/1-w": 2.9597182273864746, "rewards_train/2-2": 2.1976659297943115, "rewards_train/2-w": -1.1879405975341797, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.785329699516296, "rewards_train/margins_1": 4.094245791435242, "rewards_train/margins_2": 3.385606527328491, "step": 364 }, { "epoch": 1.09, "logps_train/policy_1_2": -135.49569702148438, "logps_train/policy_1_l": -144.6507110595703, "logps_train/policy_1_w": -78.26123809814453, "logps_train/policy_2_2": -78.07977294921875, "logps_train/policy_2_w": -126.10260009765625, "logps_train/ref_1_2": -119.5, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -99.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -118.5, "rewards_train/1-2": -1.6218349933624268, "rewards_train/1-l": -1.738264799118042, "rewards_train/1-w": 2.0625479221343994, "rewards_train/2-2": 1.8111631870269775, "rewards_train/2-w": -0.7653381824493408, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8008127212524414, "rewards_train/margins_1": 3.684382915496826, "rewards_train/margins_2": 2.5765013694763184, "step": 364 }, { "epoch": 1.09, "logps_train/policy_1_2": -172.35528564453125, "logps_train/policy_1_l": -155.17242431640625, "logps_train/policy_1_w": -88.80888366699219, "logps_train/policy_2_2": -108.97062683105469, "logps_train/policy_2_w": -136.41189575195312, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": -0.7812298536300659, "rewards_train/1-l": -2.0811591148376465, "rewards_train/1-w": 2.1894235610961914, "rewards_train/2-2": 2.982234001159668, "rewards_train/2-w": -1.0888092517852783, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.270582675933838, "rewards_train/margins_1": 2.9706534147262573, "rewards_train/margins_2": 4.071043252944946, "step": 364 }, { "epoch": 1.09, "logps_train/policy_1_2": -159.13720703125, "logps_train/policy_1_l": -154.74488830566406, "logps_train/policy_1_w": -89.12936401367188, "logps_train/policy_2_2": -99.82798767089844, "logps_train/policy_2_w": -162.83824157714844, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -1.3164560794830322, "rewards_train/1-l": -1.8614027500152588, "rewards_train/1-w": 2.590384006500244, "rewards_train/2-2": 2.2945446968078613, "rewards_train/2-w": -1.711949110031128, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.451786756515503, "rewards_train/margins_1": 3.9068400859832764, "rewards_train/margins_2": 4.006493806838989, "step": 364 }, { "epoch": 1.09, "logps_train/policy_1_2": -163.12359619140625, "logps_train/policy_1_l": -139.63070678710938, "logps_train/policy_1_w": -111.30564880371094, "logps_train/policy_2_2": -108.48834228515625, "logps_train/policy_2_w": -159.95162963867188, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.5236870646476746, "rewards_train/1-l": -1.1958826780319214, "rewards_train/1-w": 2.979884147644043, "rewards_train/2-2": 2.9519474506378174, "rewards_train/2-w": -0.09946021437644958, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.175766825675964, "rewards_train/margins_1": 3.5035712122917175, "rewards_train/margins_2": 3.051407665014267, "step": 365 }, { "epoch": 1.09, "logps_train/policy_1_2": -214.02508544921875, "logps_train/policy_1_l": -222.0554962158203, "logps_train/policy_1_w": -157.97628784179688, "logps_train/policy_2_2": -144.75332641601562, "logps_train/policy_2_w": -224.22219848632812, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": -0.8023131489753723, "rewards_train/1-l": -2.074885368347168, "rewards_train/1-w": 3.35471510887146, "rewards_train/2-2": 3.7338461875915527, "rewards_train/2-w": -0.7808155417442322, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.429600477218628, "rewards_train/margins_1": 4.157028257846832, "rewards_train/margins_2": 4.514661729335785, "step": 365 }, { "epoch": 1.09, "logps_train/policy_1_2": -166.3069610595703, "logps_train/policy_1_l": -182.3922119140625, "logps_train/policy_1_w": -119.38162231445312, "logps_train/policy_2_2": -121.26163482666016, "logps_train/policy_2_w": -181.2782745361328, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -0.10886949300765991, "rewards_train/1-l": -1.9049676656723022, "rewards_train/1-w": 3.2115933895111084, "rewards_train/2-2": 2.7209062576293945, "rewards_train/2-w": -0.49462464451789856, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.116561055183411, "rewards_train/margins_1": 3.3204628825187683, "rewards_train/margins_2": 3.215530902147293, "step": 365 }, { "epoch": 1.09, "logps_train/policy_1_2": -253.7921142578125, "logps_train/policy_1_l": -230.50772094726562, "logps_train/policy_1_w": -156.16688537597656, "logps_train/policy_2_2": -167.12879943847656, "logps_train/policy_2_w": -246.84243774414062, "logps_train/ref_1_2": -243.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -233.0, "rewards_train/1-2": -1.0784293413162231, "rewards_train/1-l": -3.0226473808288574, "rewards_train/1-w": 4.1215925216674805, "rewards_train/2-2": 4.245713233947754, "rewards_train/2-w": -1.3670566082000732, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.144239902496338, "rewards_train/margins_1": 5.200021862983704, "rewards_train/margins_2": 5.612769842147827, "step": 365 }, { "epoch": 1.1, "learning_rate": 2.3272513794928055e-06, "loss": 0.6928, "step": 366 }, { "epoch": 1.1, "logps_train/policy_1_2": -186.59713745117188, "logps_train/policy_1_l": -135.42251586914062, "logps_train/policy_1_w": -82.52650451660156, "logps_train/policy_2_2": -112.00839233398438, "logps_train/policy_2_w": -138.0721435546875, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": -2.0537564754486084, "rewards_train/1-l": -1.725381851196289, "rewards_train/1-w": 2.5062365531921387, "rewards_train/2-2": 3.024453639984131, "rewards_train/2-w": -0.9721556901931763, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.231618404388428, "rewards_train/margins_1": 4.559993028640747, "rewards_train/margins_2": 3.996609330177307, "step": 366 }, { "epoch": 1.1, "logps_train/policy_1_2": -156.7421875, "logps_train/policy_1_l": -120.16961669921875, "logps_train/policy_1_w": -87.83092498779297, "logps_train/policy_2_2": -101.02376556396484, "logps_train/policy_2_w": -132.78536987304688, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -0.7550779581069946, "rewards_train/1-l": -1.5270203351974487, "rewards_train/1-w": 3.0432746410369873, "rewards_train/2-2": 3.019498825073242, "rewards_train/2-w": 0.17204883694648743, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.570294976234436, "rewards_train/margins_1": 3.798352599143982, "rewards_train/margins_2": 2.8474499881267548, "step": 366 }, { "epoch": 1.1, "logps_train/policy_1_2": -239.21267700195312, "logps_train/policy_1_l": -170.6937255859375, "logps_train/policy_1_w": -158.68524169921875, "logps_train/policy_2_2": -156.45419311523438, "logps_train/policy_2_w": -257.7899475097656, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": -1.236891746520996, "rewards_train/1-l": -1.4116568565368652, "rewards_train/1-w": 4.339875221252441, "rewards_train/2-2": 3.767472267150879, "rewards_train/2-w": -1.9436434507369995, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.751532077789307, "rewards_train/margins_1": 5.5767669677734375, "rewards_train/margins_2": 5.711115717887878, "step": 366 }, { "epoch": 1.1, "logps_train/policy_1_2": -147.8229217529297, "logps_train/policy_1_l": -158.09384155273438, "logps_train/policy_1_w": -113.4779052734375, "logps_train/policy_2_2": -98.97802734375, "logps_train/policy_2_w": -177.70034790039062, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -0.452996164560318, "rewards_train/1-l": -1.4636807441711426, "rewards_train/1-w": 3.248302936553955, "rewards_train/2-2": 2.337158679962158, "rewards_train/2-w": -0.827846884727478, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.711983680725098, "rewards_train/margins_1": 3.701299101114273, "rewards_train/margins_2": 3.1650055646896362, "step": 366 }, { "epoch": 1.1, "logps_train/policy_1_2": -185.8916473388672, "logps_train/policy_1_l": -203.54034423828125, "logps_train/policy_1_w": -112.99774169921875, "logps_train/policy_2_2": -128.21939086914062, "logps_train/policy_2_w": -183.3826904296875, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -0.514946699142456, "rewards_train/1-l": -2.0761051177978516, "rewards_train/1-w": 3.0675110816955566, "rewards_train/2-2": 2.9014978408813477, "rewards_train/2-w": -0.929186999797821, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.143616199493408, "rewards_train/margins_1": 3.5824577808380127, "rewards_train/margins_2": 3.8306848406791687, "step": 367 }, { "epoch": 1.1, "logps_train/policy_1_2": -219.23826599121094, "logps_train/policy_1_l": -151.15394592285156, "logps_train/policy_1_w": -138.69476318359375, "logps_train/policy_2_2": -140.9278564453125, "logps_train/policy_2_w": -214.43276977539062, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -1.4980454444885254, "rewards_train/1-l": -1.5910295248031616, "rewards_train/1-w": 3.087653160095215, "rewards_train/2-2": 3.8306527137756348, "rewards_train/2-w": -1.8686668872833252, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.6786826848983765, "rewards_train/margins_1": 4.58569860458374, "rewards_train/margins_2": 5.69931960105896, "step": 367 }, { "epoch": 1.1, "logps_train/policy_1_2": -210.2506866455078, "logps_train/policy_1_l": -182.68040466308594, "logps_train/policy_1_w": -147.4197998046875, "logps_train/policy_2_2": -134.01846313476562, "logps_train/policy_2_w": -208.70529174804688, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.6816115379333496, "rewards_train/1-l": -1.847386121749878, "rewards_train/1-w": 2.8822386264801025, "rewards_train/2-2": 3.305546283721924, "rewards_train/2-w": -0.94338059425354, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.7296247482299805, "rewards_train/margins_1": 4.563850164413452, "rewards_train/margins_2": 4.248926877975464, "step": 367 }, { "epoch": 1.1, "logps_train/policy_1_2": -156.4058837890625, "logps_train/policy_1_l": -139.98593139648438, "logps_train/policy_1_w": -116.13581848144531, "logps_train/policy_2_2": -109.44781494140625, "logps_train/policy_2_w": -176.43267822265625, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -0.5319944620132446, "rewards_train/1-l": -1.7552340030670166, "rewards_train/1-w": 2.7569260597229004, "rewards_train/2-2": 2.4175233840942383, "rewards_train/2-w": -0.7737359404563904, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.512160062789917, "rewards_train/margins_1": 3.288920521736145, "rewards_train/margins_2": 3.1912593245506287, "step": 367 }, { "epoch": 1.1, "learning_rate": 2.3026211886807205e-06, "loss": 0.6134, "step": 368 }, { "epoch": 1.1, "logps_train/policy_1_2": -179.02383422851562, "logps_train/policy_1_l": -162.0640869140625, "logps_train/policy_1_w": -138.9273681640625, "logps_train/policy_2_2": -128.64846801757812, "logps_train/policy_2_w": -215.32545471191406, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": 0.000740543007850647, "rewards_train/1-l": -1.4694955348968506, "rewards_train/1-w": 4.27054500579834, "rewards_train/2-2": 3.1456990242004395, "rewards_train/2-w": -0.4581325948238373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.74004054069519, "rewards_train/margins_1": 4.269804462790489, "rewards_train/margins_2": 3.6038316190242767, "step": 368 }, { "epoch": 1.1, "logps_train/policy_1_2": -165.72042846679688, "logps_train/policy_1_l": -172.26255798339844, "logps_train/policy_1_w": -111.64645385742188, "logps_train/policy_2_2": -112.9349365234375, "logps_train/policy_2_w": -171.93727111816406, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -0.9196994304656982, "rewards_train/1-l": -2.4584832191467285, "rewards_train/1-w": 3.1613316535949707, "rewards_train/2-2": 2.82681941986084, "rewards_train/2-w": -0.9070079922676086, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.619814872741699, "rewards_train/margins_1": 4.081031084060669, "rewards_train/margins_2": 3.7338274121284485, "step": 368 }, { "epoch": 1.1, "logps_train/policy_1_2": -116.98090362548828, "logps_train/policy_1_l": -97.2705307006836, "logps_train/policy_1_w": -63.35435485839844, "logps_train/policy_2_2": -80.20618438720703, "logps_train/policy_2_w": -113.06999206542969, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -88.0, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -105.5, "rewards_train/1-2": -0.5336378216743469, "rewards_train/1-l": -1.5276386737823486, "rewards_train/1-w": 2.4290177822113037, "rewards_train/2-2": 2.036999225616455, "rewards_train/2-w": -0.7573903203010559, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9566564559936523, "rewards_train/margins_1": 2.9626556038856506, "rewards_train/margins_2": 2.794389545917511, "step": 368 }, { "epoch": 1.1, "logps_train/policy_1_2": -185.267333984375, "logps_train/policy_1_l": -154.2744140625, "logps_train/policy_1_w": -103.07376098632812, "logps_train/policy_2_2": -124.21980285644531, "logps_train/policy_2_w": -159.0867919921875, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": 0.03205537796020508, "rewards_train/1-l": -1.5299817323684692, "rewards_train/1-w": 2.873288154602051, "rewards_train/2-2": 4.014934062957764, "rewards_train/2-w": -0.7189315557479858, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.40326988697052, "rewards_train/margins_1": 2.8412327766418457, "rewards_train/margins_2": 4.7338656187057495, "step": 368 }, { "epoch": 1.1, "logps_train/policy_1_2": -164.30999755859375, "logps_train/policy_1_l": -127.96905517578125, "logps_train/policy_1_w": -100.41694641113281, "logps_train/policy_2_2": -108.96653747558594, "logps_train/policy_2_w": -151.72341918945312, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -0.356781929731369, "rewards_train/1-l": -1.8615543842315674, "rewards_train/1-w": 3.063774585723877, "rewards_train/2-2": 2.997877597808838, "rewards_train/2-w": -0.06296640634536743, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.925328969955444, "rewards_train/margins_1": 3.420556515455246, "rewards_train/margins_2": 3.0608440041542053, "step": 369 }, { "epoch": 1.1, "logps_train/policy_1_2": -173.322021484375, "logps_train/policy_1_l": -178.69544982910156, "logps_train/policy_1_w": -140.08047485351562, "logps_train/policy_2_2": -114.93550872802734, "logps_train/policy_2_w": -212.98867797851562, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -0.8962042331695557, "rewards_train/1-l": -1.9414656162261963, "rewards_train/1-w": 3.1532678604125977, "rewards_train/2-2": 2.8387975692749023, "rewards_train/2-w": -1.28898024559021, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.094733476638794, "rewards_train/margins_1": 4.049472093582153, "rewards_train/margins_2": 4.127777814865112, "step": 369 }, { "epoch": 1.1, "logps_train/policy_1_2": -194.98370361328125, "logps_train/policy_1_l": -265.60235595703125, "logps_train/policy_1_w": -135.66329956054688, "logps_train/policy_2_2": -136.35316467285156, "logps_train/policy_2_w": -194.5296630859375, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -0.08948442339897156, "rewards_train/1-l": -3.71223783493042, "rewards_train/1-w": 3.4469523429870605, "rewards_train/2-2": 3.6234726905822754, "rewards_train/2-w": -0.3842162489891052, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 7.1591901779174805, "rewards_train/margins_1": 3.536436766386032, "rewards_train/margins_2": 4.007688939571381, "step": 369 }, { "epoch": 1.1, "logps_train/policy_1_2": -154.29281616210938, "logps_train/policy_1_l": -229.3157501220703, "logps_train/policy_1_w": -100.93003845214844, "logps_train/policy_2_2": -99.67024230957031, "logps_train/policy_2_w": -145.870849609375, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -0.3380693793296814, "rewards_train/1-l": -2.7454423904418945, "rewards_train/1-w": 2.4323878288269043, "rewards_train/2-2": 3.198209762573242, "rewards_train/2-w": -0.4089600741863251, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.177830219268799, "rewards_train/margins_1": 2.7704572081565857, "rewards_train/margins_2": 3.6071698367595673, "step": 369 }, { "epoch": 1.11, "learning_rate": 2.278010261692663e-06, "loss": 0.6961, "step": 370 }, { "epoch": 1.11, "logps_train/policy_1_2": -172.12374877929688, "logps_train/policy_1_l": -187.91104125976562, "logps_train/policy_1_w": -121.67152404785156, "logps_train/policy_2_2": -111.2186508178711, "logps_train/policy_2_w": -179.26675415039062, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.1686246395111084, "rewards_train/1-l": -2.264540672302246, "rewards_train/1-w": 3.164292335510254, "rewards_train/2-2": 2.6772568225860596, "rewards_train/2-w": -0.8868330717086792, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.4288330078125, "rewards_train/margins_1": 4.332916975021362, "rewards_train/margins_2": 3.5640898942947388, "step": 370 }, { "epoch": 1.11, "logps_train/policy_1_2": -240.4810333251953, "logps_train/policy_1_l": -217.34568786621094, "logps_train/policy_1_w": -132.4196014404297, "logps_train/policy_2_2": -153.18260192871094, "logps_train/policy_2_w": -213.95359802246094, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -1.7789640426635742, "rewards_train/1-l": -1.7708489894866943, "rewards_train/1-w": 3.613508462905884, "rewards_train/2-2": 3.534864902496338, "rewards_train/2-w": -0.5812978148460388, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.384357452392578, "rewards_train/margins_1": 5.392472505569458, "rewards_train/margins_2": 4.116162717342377, "step": 370 }, { "epoch": 1.11, "logps_train/policy_1_2": -167.2061004638672, "logps_train/policy_1_l": -140.69558715820312, "logps_train/policy_1_w": -117.41697692871094, "logps_train/policy_2_2": -110.48554992675781, "logps_train/policy_2_w": -179.76565551757812, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.9157276749610901, "rewards_train/1-l": -1.3539338111877441, "rewards_train/1-w": 2.9606456756591797, "rewards_train/2-2": 2.6951959133148193, "rewards_train/2-w": -0.9461947083473206, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.314579486846924, "rewards_train/margins_1": 3.8763733506202698, "rewards_train/margins_2": 3.64139062166214, "step": 370 }, { "epoch": 1.11, "logps_train/policy_1_2": -173.60354614257812, "logps_train/policy_1_l": -140.81129455566406, "logps_train/policy_1_w": -119.88055419921875, "logps_train/policy_2_2": -115.29818725585938, "logps_train/policy_2_w": -189.73500061035156, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -0.622073233127594, "rewards_train/1-l": -1.7647039890289307, "rewards_train/1-w": 3.863506555557251, "rewards_train/2-2": 3.0100245475769043, "rewards_train/2-w": -0.773500382900238, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.628210544586182, "rewards_train/margins_1": 4.485579788684845, "rewards_train/margins_2": 3.7835249304771423, "step": 370 }, { "epoch": 1.11, "logps_train/policy_1_2": -210.12815856933594, "logps_train/policy_1_l": -155.4661865234375, "logps_train/policy_1_w": -94.6542739868164, "logps_train/policy_2_2": -131.31764221191406, "logps_train/policy_2_w": -151.63485717773438, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -1.2050037384033203, "rewards_train/1-l": -1.6684924364089966, "rewards_train/1-w": 2.1999049186706543, "rewards_train/2-2": 3.6862049102783203, "rewards_train/2-w": -1.0384856462478638, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.868397355079651, "rewards_train/margins_1": 3.4049086570739746, "rewards_train/margins_2": 4.724690556526184, "step": 371 }, { "epoch": 1.11, "logps_train/policy_1_2": -173.14950561523438, "logps_train/policy_1_l": -197.98548889160156, "logps_train/policy_1_w": -93.14399719238281, "logps_train/policy_2_2": -116.01818084716797, "logps_train/policy_2_w": -137.90585327148438, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -0.6878015995025635, "rewards_train/1-l": -2.8441548347473145, "rewards_train/1-w": 2.4373583793640137, "rewards_train/2-2": 3.0708377361297607, "rewards_train/2-w": -0.35031503438949585, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.281513214111328, "rewards_train/margins_1": 3.125159978866577, "rewards_train/margins_2": 3.4211527705192566, "step": 371 }, { "epoch": 1.11, "logps_train/policy_1_2": -177.69256591796875, "logps_train/policy_1_l": -129.72238159179688, "logps_train/policy_1_w": -125.96273803710938, "logps_train/policy_2_2": -109.39453887939453, "logps_train/policy_2_w": -195.4012451171875, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -1.320428729057312, "rewards_train/1-l": -1.7983615398406982, "rewards_train/1-w": 3.0234527587890625, "rewards_train/2-2": 3.021679401397705, "rewards_train/2-w": -1.6822141408920288, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.821814298629761, "rewards_train/margins_1": 4.3438814878463745, "rewards_train/margins_2": 4.703893542289734, "step": 371 }, { "epoch": 1.11, "logps_train/policy_1_2": -201.2456512451172, "logps_train/policy_1_l": -130.14120483398438, "logps_train/policy_1_w": -90.78376770019531, "logps_train/policy_2_2": -131.31613159179688, "logps_train/policy_2_w": -141.85830688476562, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -112.5, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -1.6530817747116089, "rewards_train/1-l": -1.769978404045105, "rewards_train/1-w": 2.7073655128479004, "rewards_train/2-2": 3.271902561187744, "rewards_train/2-w": -0.6959864497184753, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.477343916893005, "rewards_train/margins_1": 4.360447287559509, "rewards_train/margins_2": 3.9678890109062195, "step": 371 }, { "epoch": 1.11, "learning_rate": 2.253421000511721e-06, "loss": 0.7252, "step": 372 }, { "epoch": 1.11, "logps_train/policy_1_2": -163.07855224609375, "logps_train/policy_1_l": -175.53955078125, "logps_train/policy_1_w": -96.10967254638672, "logps_train/policy_2_2": -114.88882446289062, "logps_train/policy_2_w": -149.7078399658203, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.4203556776046753, "rewards_train/1-l": -2.483351230621338, "rewards_train/1-w": 2.96559476852417, "rewards_train/2-2": 2.736117362976074, "rewards_train/2-w": -0.20437762141227722, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.448945999145508, "rewards_train/margins_1": 3.385950446128845, "rewards_train/margins_2": 2.9404949843883514, "step": 372 }, { "epoch": 1.11, "logps_train/policy_1_2": -255.89797973632812, "logps_train/policy_1_l": -247.80728149414062, "logps_train/policy_1_w": -171.74623107910156, "logps_train/policy_2_2": -175.04428100585938, "logps_train/policy_2_w": -253.35269165039062, "logps_train/ref_1_2": -248.0, "logps_train/ref_1_l": -219.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -244.0, "rewards_train/1-2": -0.7284684181213379, "rewards_train/1-l": -2.893324851989746, "rewards_train/1-w": 4.036314010620117, "rewards_train/2-2": 4.336002349853516, "rewards_train/2-w": -0.958707869052887, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.929638862609863, "rewards_train/margins_1": 4.764782428741455, "rewards_train/margins_2": 5.294710218906403, "step": 372 }, { "epoch": 1.11, "logps_train/policy_1_2": -190.4831085205078, "logps_train/policy_1_l": -163.9071044921875, "logps_train/policy_1_w": -125.13648986816406, "logps_train/policy_2_2": -126.73452758789062, "logps_train/policy_2_w": -202.42271423339844, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -0.9794150590896606, "rewards_train/1-l": -2.1527228355407715, "rewards_train/1-w": 3.5187735557556152, "rewards_train/2-2": 3.0866308212280273, "rewards_train/2-w": -1.4422721862792969, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.671496391296387, "rewards_train/margins_1": 4.498188614845276, "rewards_train/margins_2": 4.528903007507324, "step": 372 }, { "epoch": 1.11, "logps_train/policy_1_2": -156.40460205078125, "logps_train/policy_1_l": -140.921875, "logps_train/policy_1_w": -123.57737731933594, "logps_train/policy_2_2": -105.0337905883789, "logps_train/policy_2_w": -192.74264526367188, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -0.4721008539199829, "rewards_train/1-l": -1.838134765625, "rewards_train/1-w": 3.1529064178466797, "rewards_train/2-2": 2.85736346244812, "rewards_train/2-w": -1.3387424945831299, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.99104118347168, "rewards_train/margins_1": 3.6250072717666626, "rewards_train/margins_2": 4.19610595703125, "step": 372 }, { "epoch": 1.12, "logps_train/policy_1_2": -188.68234252929688, "logps_train/policy_1_l": -171.57400512695312, "logps_train/policy_1_w": -96.53189086914062, "logps_train/policy_2_2": -111.96082305908203, "logps_train/policy_2_w": -156.45355224609375, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -1.6213583946228027, "rewards_train/1-l": -2.288651466369629, "rewards_train/1-w": 2.6532068252563477, "rewards_train/2-2": 3.0921988487243652, "rewards_train/2-w": -0.9504348039627075, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.941858291625977, "rewards_train/margins_1": 4.27456521987915, "rewards_train/margins_2": 4.042633652687073, "step": 373 }, { "epoch": 1.12, "logps_train/policy_1_2": -218.38833618164062, "logps_train/policy_1_l": -178.24618530273438, "logps_train/policy_1_w": -134.4898681640625, "logps_train/policy_2_2": -146.82574462890625, "logps_train/policy_2_w": -204.04685974121094, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.4368319511413574, "rewards_train/1-l": -1.8737881183624268, "rewards_train/1-w": 2.915465831756592, "rewards_train/2-2": 3.103752374649048, "rewards_train/2-w": -1.5345690250396729, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.7892539501190186, "rewards_train/margins_1": 4.352297782897949, "rewards_train/margins_2": 4.638321399688721, "step": 373 }, { "epoch": 1.12, "logps_train/policy_1_2": -174.1063690185547, "logps_train/policy_1_l": -209.32437133789062, "logps_train/policy_1_w": -114.08921813964844, "logps_train/policy_2_2": -102.06562805175781, "logps_train/policy_2_w": -186.81297302246094, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -2.051651954650879, "rewards_train/1-l": -2.958963632583618, "rewards_train/1-w": 2.8579723834991455, "rewards_train/2-2": 2.7678515911102295, "rewards_train/2-w": -1.2896963357925415, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.816936016082764, "rewards_train/margins_1": 4.909624338150024, "rewards_train/margins_2": 4.057547926902771, "step": 373 }, { "epoch": 1.12, "logps_train/policy_1_2": -192.4703369140625, "logps_train/policy_1_l": -117.89486694335938, "logps_train/policy_1_w": -116.13349914550781, "logps_train/policy_2_2": -114.41107177734375, "logps_train/policy_2_w": -196.435791015625, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -1.524378776550293, "rewards_train/1-l": -1.597055196762085, "rewards_train/1-w": 3.343998432159424, "rewards_train/2-2": 3.5752992630004883, "rewards_train/2-w": -1.7864508628845215, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.941053628921509, "rewards_train/margins_1": 4.868377208709717, "rewards_train/margins_2": 5.36175012588501, "step": 373 }, { "epoch": 1.12, "learning_rate": 2.2288558050064366e-06, "loss": 0.6345, "step": 374 }, { "epoch": 1.12, "logps_train/policy_1_2": -176.2151336669922, "logps_train/policy_1_l": -140.47772216796875, "logps_train/policy_1_w": -115.48194122314453, "logps_train/policy_2_2": -121.2494125366211, "logps_train/policy_2_w": -174.08132934570312, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -1.0074502229690552, "rewards_train/1-l": -2.05568265914917, "rewards_train/1-w": 2.565868616104126, "rewards_train/2-2": 2.7672457695007324, "rewards_train/2-w": -1.0687766075134277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.621551275253296, "rewards_train/margins_1": 3.573318839073181, "rewards_train/margins_2": 3.83602237701416, "step": 374 }, { "epoch": 1.12, "logps_train/policy_1_2": -143.89059448242188, "logps_train/policy_1_l": -124.44259643554688, "logps_train/policy_1_w": -83.25630187988281, "logps_train/policy_2_2": -86.80109405517578, "logps_train/policy_2_w": -145.2071533203125, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": -1.1496062278747559, "rewards_train/1-l": -1.5776114463806152, "rewards_train/1-w": 2.6442923545837402, "rewards_train/2-2": 2.5048511028289795, "rewards_train/2-w": -1.406651258468628, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.2219038009643555, "rewards_train/margins_1": 3.793898582458496, "rewards_train/margins_2": 3.9115023612976074, "step": 374 }, { "epoch": 1.12, "logps_train/policy_1_2": -215.8124542236328, "logps_train/policy_1_l": -156.86212158203125, "logps_train/policy_1_w": -114.15389251708984, "logps_train/policy_2_2": -143.50979614257812, "logps_train/policy_2_w": -181.91058349609375, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -1.2575151920318604, "rewards_train/1-l": -1.2145192623138428, "rewards_train/1-w": 2.9420323371887207, "rewards_train/2-2": 3.1583969593048096, "rewards_train/2-w": -1.2734808921813965, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.1565515995025635, "rewards_train/margins_1": 4.199547529220581, "rewards_train/margins_2": 4.431877851486206, "step": 374 }, { "epoch": 1.12, "logps_train/policy_1_2": -158.86456298828125, "logps_train/policy_1_l": -158.02703857421875, "logps_train/policy_1_w": -109.78858184814453, "logps_train/policy_2_2": -108.46023559570312, "logps_train/policy_2_w": -181.6269989013672, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.8567694425582886, "rewards_train/1-l": -2.049591064453125, "rewards_train/1-w": 2.7174437046051025, "rewards_train/2-2": 2.172140121459961, "rewards_train/2-w": -1.1562542915344238, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.7670347690582275, "rewards_train/margins_1": 3.574213147163391, "rewards_train/margins_2": 3.3283944129943848, "step": 374 }, { "epoch": 1.12, "logps_train/policy_1_2": -215.46343994140625, "logps_train/policy_1_l": -195.21095275878906, "logps_train/policy_1_w": -158.59579467773438, "logps_train/policy_2_2": -136.7734375, "logps_train/policy_2_w": -245.4224395751953, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": -1.659430980682373, "rewards_train/1-l": -2.1010849475860596, "rewards_train/1-w": 3.668546199798584, "rewards_train/2-2": 3.387598752975464, "rewards_train/2-w": -1.4188071489334106, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.7696311473846436, "rewards_train/margins_1": 5.327977180480957, "rewards_train/margins_2": 4.8064059019088745, "step": 375 }, { "epoch": 1.12, "logps_train/policy_1_2": -154.26205444335938, "logps_train/policy_1_l": -180.21531677246094, "logps_train/policy_1_w": -103.72434997558594, "logps_train/policy_2_2": -97.63893127441406, "logps_train/policy_2_w": -179.00827026367188, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.3168306350708008, "rewards_train/1-l": -2.941844940185547, "rewards_train/1-w": 2.4635024070739746, "rewards_train/2-2": 2.1226303577423096, "rewards_train/2-w": -1.8957481384277344, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.4053473472595215, "rewards_train/margins_1": 3.7803330421447754, "rewards_train/margins_2": 4.018378496170044, "step": 375 }, { "epoch": 1.12, "logps_train/policy_1_2": -224.73779296875, "logps_train/policy_1_l": -209.72146606445312, "logps_train/policy_1_w": -133.8000946044922, "logps_train/policy_2_2": -161.77993774414062, "logps_train/policy_2_w": -201.1142578125, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.4905760884284973, "rewards_train/1-l": -2.370584726333618, "rewards_train/1-w": 3.3532915115356445, "rewards_train/2-2": 3.5936851501464844, "rewards_train/2-w": -0.9518553018569946, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.723876237869263, "rewards_train/margins_1": 3.843867599964142, "rewards_train/margins_2": 4.545540452003479, "step": 375 }, { "epoch": 1.12, "logps_train/policy_1_2": -201.6160430908203, "logps_train/policy_1_l": -203.37374877929688, "logps_train/policy_1_w": -119.93785095214844, "logps_train/policy_2_2": -135.0927734375, "logps_train/policy_2_w": -184.78631591796875, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.0659018754959106, "rewards_train/1-l": -2.548898935317993, "rewards_train/1-w": 2.929652214050293, "rewards_train/2-2": 3.3405261039733887, "rewards_train/2-w": -1.2497243881225586, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.478551149368286, "rewards_train/margins_1": 3.9955540895462036, "rewards_train/margins_2": 4.590250492095947, "step": 375 }, { "epoch": 1.13, "learning_rate": 2.204317072696586e-06, "loss": 0.6619, "step": 376 }, { "epoch": 1.13, "logps_train/policy_1_2": -207.78466796875, "logps_train/policy_1_l": -149.96380615234375, "logps_train/policy_1_w": -122.65391540527344, "logps_train/policy_2_2": -139.89744567871094, "logps_train/policy_2_w": -182.9178466796875, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.032666802406311, "rewards_train/1-l": -1.7755054235458374, "rewards_train/1-w": 3.161757230758667, "rewards_train/2-2": 3.1203320026397705, "rewards_train/2-w": -0.6421760320663452, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.937262654304504, "rewards_train/margins_1": 4.194424033164978, "rewards_train/margins_2": 3.7625080347061157, "step": 376 }, { "epoch": 1.13, "logps_train/policy_1_2": -171.4884033203125, "logps_train/policy_1_l": -178.4679412841797, "logps_train/policy_1_w": -120.59967803955078, "logps_train/policy_2_2": -114.81684112548828, "logps_train/policy_2_w": -194.92884826660156, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.869543194770813, "rewards_train/1-l": -2.660252332687378, "rewards_train/1-w": 3.2758231163024902, "rewards_train/2-2": 3.093315839767456, "rewards_train/2-w": -1.2136856317520142, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.936075448989868, "rewards_train/margins_1": 4.145366311073303, "rewards_train/margins_2": 4.30700147151947, "step": 376 }, { "epoch": 1.13, "logps_train/policy_1_2": -190.96258544921875, "logps_train/policy_1_l": -259.56805419921875, "logps_train/policy_1_w": -142.84588623046875, "logps_train/policy_2_2": -128.2604217529297, "logps_train/policy_2_w": -230.26803588867188, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -1.1103203296661377, "rewards_train/1-l": -2.948211908340454, "rewards_train/1-w": 3.1167783737182617, "rewards_train/2-2": 2.7550125122070312, "rewards_train/2-w": -1.6088355779647827, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.064990282058716, "rewards_train/margins_1": 4.227098703384399, "rewards_train/margins_2": 4.363848090171814, "step": 376 }, { "epoch": 1.13, "logps_train/policy_1_2": -182.4136505126953, "logps_train/policy_1_l": -159.9759521484375, "logps_train/policy_1_w": -100.54615783691406, "logps_train/policy_2_2": -121.51094055175781, "logps_train/policy_2_w": -171.38668823242188, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -1.1245684623718262, "rewards_train/1-l": -2.0986199378967285, "rewards_train/1-w": 3.0103745460510254, "rewards_train/2-2": 2.719804286956787, "rewards_train/2-w": -1.0218720436096191, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.108994483947754, "rewards_train/margins_1": 4.134943008422852, "rewards_train/margins_2": 3.7416763305664062, "step": 376 }, { "epoch": 1.13, "logps_train/policy_1_2": -208.10101318359375, "logps_train/policy_1_l": -218.78213500976562, "logps_train/policy_1_w": -132.86119079589844, "logps_train/policy_2_2": -140.571533203125, "logps_train/policy_2_w": -205.17510986328125, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.2880322933197021, "rewards_train/1-l": -2.903116226196289, "rewards_train/1-w": 2.9101693630218506, "rewards_train/2-2": 3.1324949264526367, "rewards_train/2-w": -1.7911432981491089, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.81328558921814, "rewards_train/margins_1": 4.198201656341553, "rewards_train/margins_2": 4.923638224601746, "step": 377 }, { "epoch": 1.13, "logps_train/policy_1_2": -163.0740966796875, "logps_train/policy_1_l": -200.63824462890625, "logps_train/policy_1_w": -123.60157012939453, "logps_train/policy_2_2": -98.19126892089844, "logps_train/policy_2_w": -210.04745483398438, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.5011584758758545, "rewards_train/1-l": -2.7640678882598877, "rewards_train/1-w": 2.912108898162842, "rewards_train/2-2": 2.584974765777588, "rewards_train/2-w": -2.1906819343566895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.6761767864227295, "rewards_train/margins_1": 4.413267374038696, "rewards_train/margins_2": 4.775656700134277, "step": 377 }, { "epoch": 1.13, "logps_train/policy_1_2": -228.72906494140625, "logps_train/policy_1_l": -209.50302124023438, "logps_train/policy_1_w": -187.29603576660156, "logps_train/policy_2_2": -151.14813232421875, "logps_train/policy_2_w": -289.69122314453125, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -227.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -264.0, "rewards_train/1-2": -1.6088447570800781, "rewards_train/1-l": -2.133650779724121, "rewards_train/1-w": 3.9598488807678223, "rewards_train/2-2": 3.324248790740967, "rewards_train/2-w": -2.590999126434326, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.093499660491943, "rewards_train/margins_1": 5.5686936378479, "rewards_train/margins_2": 5.915247917175293, "step": 377 }, { "epoch": 1.13, "logps_train/policy_1_2": -151.3409423828125, "logps_train/policy_1_l": -146.29727172851562, "logps_train/policy_1_w": -94.03047180175781, "logps_train/policy_2_2": -99.52900695800781, "logps_train/policy_2_w": -150.341796875, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -0.4861447215080261, "rewards_train/1-l": -1.9973911046981812, "rewards_train/1-w": 2.751249074935913, "rewards_train/2-2": 2.9451465606689453, "rewards_train/2-w": -0.7568360567092896, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.748640179634094, "rewards_train/margins_1": 3.237393796443939, "rewards_train/margins_2": 3.701982617378235, "step": 377 }, { "epoch": 1.13, "learning_rate": 2.1798071985191833e-06, "loss": 0.5365, "step": 378 }, { "epoch": 1.13, "logps_train/policy_1_2": -200.12661743164062, "logps_train/policy_1_l": -133.522705078125, "logps_train/policy_1_w": -116.84788513183594, "logps_train/policy_2_2": -133.8075408935547, "logps_train/policy_2_w": -176.6617889404297, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -115.5, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": -0.9304341077804565, "rewards_train/1-l": -1.804222583770752, "rewards_train/1-w": 2.447962760925293, "rewards_train/2-2": 3.3530352115631104, "rewards_train/2-w": -0.97809237241745, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.252185344696045, "rewards_train/margins_1": 3.3783968687057495, "rewards_train/margins_2": 4.33112758398056, "step": 378 }, { "epoch": 1.13, "logps_train/policy_1_2": -200.18453979492188, "logps_train/policy_1_l": -165.23326110839844, "logps_train/policy_1_w": -96.47821044921875, "logps_train/policy_2_2": -122.97271728515625, "logps_train/policy_2_w": -166.52589416503906, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -2.1827099323272705, "rewards_train/1-l": -2.295884132385254, "rewards_train/1-w": 2.691437244415283, "rewards_train/2-2": 2.6972594261169434, "rewards_train/2-w": -1.4264180660247803, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.987321376800537, "rewards_train/margins_1": 4.874147176742554, "rewards_train/margins_2": 4.123677492141724, "step": 378 }, { "epoch": 1.13, "logps_train/policy_1_2": -194.46119689941406, "logps_train/policy_1_l": -197.09375, "logps_train/policy_1_w": -91.69064331054688, "logps_train/policy_2_2": -146.88943481445312, "logps_train/policy_2_w": -128.76034545898438, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": -0.8219009637832642, "rewards_train/1-l": -2.8122081756591797, "rewards_train/1-w": 2.841776132583618, "rewards_train/2-2": 2.518869161605835, "rewards_train/2-w": 0.42474597692489624, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.653984308242798, "rewards_train/margins_1": 3.6636770963668823, "rewards_train/margins_2": 2.0941231846809387, "step": 378 }, { "epoch": 1.13, "logps_train/policy_1_2": -92.33089447021484, "logps_train/policy_1_l": -77.8370361328125, "logps_train/policy_1_w": -40.35573959350586, "logps_train/policy_2_2": -62.095603942871094, "logps_train/policy_2_w": -72.79965209960938, "logps_train/ref_1_2": -86.0, "logps_train/ref_1_l": -64.0, "logps_train/ref_1_w": -56.0, "logps_train/ref_2_2": -76.0, "logps_train/ref_2_w": -67.0, "rewards_train/1-2": -0.6190263032913208, "rewards_train/1-l": -1.3780395984649658, "rewards_train/1-w": 1.575363278388977, "rewards_train/2-2": 1.3984471559524536, "rewards_train/2-w": -0.5866066217422485, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 2.953402876853943, "rewards_train/margins_1": 2.194389581680298, "rewards_train/margins_2": 1.9850537776947021, "step": 378 }, { "epoch": 1.13, "logps_train/policy_1_2": -210.20628356933594, "logps_train/policy_1_l": -218.0662841796875, "logps_train/policy_1_w": -136.2288818359375, "logps_train/policy_2_2": -142.94802856445312, "logps_train/policy_2_w": -213.46299743652344, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -2.0889885425567627, "rewards_train/1-l": -3.2061643600463867, "rewards_train/1-w": 3.4373409748077393, "rewards_train/2-2": 2.584531307220459, "rewards_train/2-w": -1.8099713325500488, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.643505334854126, "rewards_train/margins_1": 5.526329517364502, "rewards_train/margins_2": 4.394502639770508, "step": 379 }, { "epoch": 1.13, "logps_train/policy_1_2": -153.29458618164062, "logps_train/policy_1_l": -193.046630859375, "logps_train/policy_1_w": -111.43508911132812, "logps_train/policy_2_2": -107.2300796508789, "logps_train/policy_2_w": -168.80718994140625, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -0.015395861119031906, "rewards_train/1-l": -2.6794190406799316, "rewards_train/1-w": 2.697507381439209, "rewards_train/2-2": 2.714296817779541, "rewards_train/2-w": -0.739997148513794, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.376926422119141, "rewards_train/margins_1": 2.712903242558241, "rewards_train/margins_2": 3.454293966293335, "step": 379 }, { "epoch": 1.13, "logps_train/policy_1_2": -179.56063842773438, "logps_train/policy_1_l": -182.82852172851562, "logps_train/policy_1_w": -127.47128295898438, "logps_train/policy_2_2": -128.99130249023438, "logps_train/policy_2_w": -179.87460327148438, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.18926653265953064, "rewards_train/1-l": -2.0851964950561523, "rewards_train/1-w": 3.2655673027038574, "rewards_train/2-2": 3.0500893592834473, "rewards_train/2-w": 0.35609352588653564, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.35076379776001, "rewards_train/margins_1": 3.454833835363388, "rewards_train/margins_2": 2.6939958333969116, "step": 379 }, { "epoch": 1.13, "logps_train/policy_1_2": -171.364501953125, "logps_train/policy_1_l": -203.51805114746094, "logps_train/policy_1_w": -131.10794067382812, "logps_train/policy_2_2": -107.34362030029297, "logps_train/policy_2_w": -200.32162475585938, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.9628171324729919, "rewards_train/1-l": -3.237523078918457, "rewards_train/1-w": 3.093502998352051, "rewards_train/2-2": 2.8343148231506348, "rewards_train/2-w": -1.060286045074463, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.331026077270508, "rewards_train/margins_1": 4.056320130825043, "rewards_train/margins_2": 3.8946008682250977, "step": 379 }, { "epoch": 1.14, "learning_rate": 2.1553285745947396e-06, "loss": 0.716, "step": 380 }, { "epoch": 1.14, "logps_train/policy_1_2": -153.3419952392578, "logps_train/policy_1_l": -166.77896118164062, "logps_train/policy_1_w": -95.6395034790039, "logps_train/policy_2_2": -91.57505798339844, "logps_train/policy_2_w": -168.88014221191406, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -1.1232616901397705, "rewards_train/1-l": -2.235049247741699, "rewards_train/1-w": 2.9508934020996094, "rewards_train/2-2": 2.6241343021392822, "rewards_train/2-w": -1.2512959241867065, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.185942649841309, "rewards_train/margins_1": 4.07415509223938, "rewards_train/margins_2": 3.8754302263259888, "step": 380 }, { "epoch": 1.14, "logps_train/policy_1_2": -292.638427734375, "logps_train/policy_1_l": -181.668212890625, "logps_train/policy_1_w": -126.16664123535156, "logps_train/policy_2_2": -175.7814483642578, "logps_train/policy_2_w": -209.3060760498047, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -215.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -3.477903127670288, "rewards_train/1-l": -2.2174057960510254, "rewards_train/1-w": 3.368199348449707, "rewards_train/2-2": 3.9431443214416504, "rewards_train/2-w": -1.5186930894851685, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.585605144500732, "rewards_train/margins_1": 6.846102476119995, "rewards_train/margins_2": 5.461837410926819, "step": 380 }, { "epoch": 1.14, "logps_train/policy_1_2": -123.5116195678711, "logps_train/policy_1_l": -84.97406005859375, "logps_train/policy_1_w": -102.98267364501953, "logps_train/policy_2_2": -77.93342590332031, "logps_train/policy_2_w": -152.97781372070312, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -71.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -0.7378809452056885, "rewards_train/1-l": -1.4013727903366089, "rewards_train/1-w": 2.3359241485595703, "rewards_train/2-2": 2.321110248565674, "rewards_train/2-w": -1.0426298379898071, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.737296938896179, "rewards_train/margins_1": 3.073805093765259, "rewards_train/margins_2": 3.363740086555481, "step": 380 }, { "epoch": 1.14, "logps_train/policy_1_2": -160.7281494140625, "logps_train/policy_1_l": -217.0853729248047, "logps_train/policy_1_w": -127.63716125488281, "logps_train/policy_2_2": -104.82498168945312, "logps_train/policy_2_w": -199.07241821289062, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -0.9806276559829712, "rewards_train/1-l": -2.6919355392456055, "rewards_train/1-w": 2.928471088409424, "rewards_train/2-2": 2.5585174560546875, "rewards_train/2-w": -1.3447421789169312, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.620406627655029, "rewards_train/margins_1": 3.909098744392395, "rewards_train/margins_2": 3.9032596349716187, "step": 380 }, { "epoch": 1.14, "logps_train/policy_1_2": -179.63320922851562, "logps_train/policy_1_l": -114.7061538696289, "logps_train/policy_1_w": -79.95648193359375, "logps_train/policy_2_2": -115.42897033691406, "logps_train/policy_2_w": -146.08828735351562, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": -2.258046865463257, "rewards_train/1-l": -1.7742650508880615, "rewards_train/1-w": 2.168609619140625, "rewards_train/2-2": 2.129758834838867, "rewards_train/2-w": -1.7256255149841309, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9428746700286865, "rewards_train/margins_1": 4.426656484603882, "rewards_train/margins_2": 3.855384349822998, "step": 381 }, { "epoch": 1.14, "logps_train/policy_1_2": -129.79257202148438, "logps_train/policy_1_l": -157.30047607421875, "logps_train/policy_1_w": -69.05963897705078, "logps_train/policy_2_2": -82.71607971191406, "logps_train/policy_2_w": -103.85704803466797, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -101.5, "rewards_train/1-2": -0.8198815584182739, "rewards_train/1-l": -2.3858084678649902, "rewards_train/1-w": 1.9629813432693481, "rewards_train/2-2": 2.4092509746551514, "rewards_train/2-w": -0.23414240777492523, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.348789811134338, "rewards_train/margins_1": 2.782862901687622, "rewards_train/margins_2": 2.6433933824300766, "step": 381 }, { "epoch": 1.14, "logps_train/policy_1_2": -169.93948364257812, "logps_train/policy_1_l": -198.1584930419922, "logps_train/policy_1_w": -161.8802490234375, "logps_train/policy_2_2": -126.31794738769531, "logps_train/policy_2_w": -232.75601196289062, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -0.014260947704315186, "rewards_train/1-l": -2.578544855117798, "rewards_train/1-w": 3.574084758758545, "rewards_train/2-2": 2.828362464904785, "rewards_train/2-w": -1.1435686349868774, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.152629613876343, "rewards_train/margins_1": 3.58834570646286, "rewards_train/margins_2": 3.9719310998916626, "step": 381 }, { "epoch": 1.14, "logps_train/policy_1_2": -195.87542724609375, "logps_train/policy_1_l": -181.50347900390625, "logps_train/policy_1_w": -140.14454650878906, "logps_train/policy_2_2": -137.23001098632812, "logps_train/policy_2_w": -210.38632202148438, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -0.033636510372161865, "rewards_train/1-l": -2.3917531967163086, "rewards_train/1-w": 3.57997989654541, "rewards_train/2-2": 3.8328583240509033, "rewards_train/2-w": -1.0652923583984375, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.971733093261719, "rewards_train/margins_1": 3.613616406917572, "rewards_train/margins_2": 4.898150682449341, "step": 381 }, { "epoch": 1.14, "learning_rate": 2.1308835899937974e-06, "loss": 0.6982, "step": 382 }, { "epoch": 1.14, "logps_train/policy_1_2": -167.55020141601562, "logps_train/policy_1_l": -156.0297393798828, "logps_train/policy_1_w": -104.44473266601562, "logps_train/policy_2_2": -105.06230163574219, "logps_train/policy_2_w": -161.47703552246094, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -1.8102939128875732, "rewards_train/1-l": -2.2114453315734863, "rewards_train/1-w": 2.0697853565216064, "rewards_train/2-2": 2.3986520767211914, "rewards_train/2-w": -1.931687831878662, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.281230688095093, "rewards_train/margins_1": 3.8800792694091797, "rewards_train/margins_2": 4.3303399085998535, "step": 382 }, { "epoch": 1.14, "logps_train/policy_1_2": -123.1106948852539, "logps_train/policy_1_l": -79.49546813964844, "logps_train/policy_1_w": -85.77676391601562, "logps_train/policy_2_2": -74.01669311523438, "logps_train/policy_2_w": -131.62106323242188, "logps_train/ref_1_2": -112.5, "logps_train/ref_1_l": -67.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": -1.0931007862091064, "rewards_train/1-l": -1.177403211593628, "rewards_train/1-w": 2.0394136905670166, "rewards_train/2-2": 2.194033622741699, "rewards_train/2-w": -0.757029116153717, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.2168169021606445, "rewards_train/margins_1": 3.132514476776123, "rewards_train/margins_2": 2.9510627388954163, "step": 382 }, { "epoch": 1.14, "logps_train/policy_1_2": -219.03335571289062, "logps_train/policy_1_l": -207.47630310058594, "logps_train/policy_1_w": -119.28524780273438, "logps_train/policy_2_2": -149.58714294433594, "logps_train/policy_2_w": -179.31748962402344, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.2790188789367676, "rewards_train/1-l": -3.051145553588867, "rewards_train/1-w": 2.8377838134765625, "rewards_train/2-2": 3.4156503677368164, "rewards_train/2-w": -0.9995722770690918, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.88892936706543, "rewards_train/margins_1": 4.11680269241333, "rewards_train/margins_2": 4.415222644805908, "step": 382 }, { "epoch": 1.14, "logps_train/policy_1_2": -154.68472290039062, "logps_train/policy_1_l": -147.74365234375, "logps_train/policy_1_w": -89.58573913574219, "logps_train/policy_2_2": -104.73358154296875, "logps_train/policy_2_w": -140.98570251464844, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -0.9715967178344727, "rewards_train/1-l": -2.412254810333252, "rewards_train/1-w": 2.7402548789978027, "rewards_train/2-2": 2.366875648498535, "rewards_train/2-w": -0.7493516206741333, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.152509689331055, "rewards_train/margins_1": 3.7118515968322754, "rewards_train/margins_2": 3.1162272691726685, "step": 382 }, { "epoch": 1.15, "logps_train/policy_1_2": -176.07553100585938, "logps_train/policy_1_l": -141.97589111328125, "logps_train/policy_1_w": -87.96678161621094, "logps_train/policy_2_2": -114.24398803710938, "logps_train/policy_2_w": -138.17613220214844, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": -1.6567717790603638, "rewards_train/1-l": -2.169074058532715, "rewards_train/1-w": 2.255275249481201, "rewards_train/2-2": 2.1443512439727783, "rewards_train/2-w": -1.0969102382659912, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.424349308013916, "rewards_train/margins_1": 3.912047028541565, "rewards_train/margins_2": 3.2412614822387695, "step": 383 }, { "epoch": 1.15, "logps_train/policy_1_2": -177.3915557861328, "logps_train/policy_1_l": -180.66598510742188, "logps_train/policy_1_w": -111.4213638305664, "logps_train/policy_2_2": -129.16282653808594, "logps_train/policy_2_w": -167.904052734375, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.7118111252784729, "rewards_train/1-l": -2.4155490398406982, "rewards_train/1-w": 2.8699729442596436, "rewards_train/2-2": 2.3122317790985107, "rewards_train/2-w": -0.8345451951026917, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.285521984100342, "rewards_train/margins_1": 3.5817840695381165, "rewards_train/margins_2": 3.1467769742012024, "step": 383 }, { "epoch": 1.15, "logps_train/policy_1_2": -115.80540466308594, "logps_train/policy_1_l": -123.21434020996094, "logps_train/policy_1_w": -74.17213439941406, "logps_train/policy_2_2": -80.6212158203125, "logps_train/policy_2_w": -120.02313232421875, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": -0.3630596697330475, "rewards_train/1-l": -1.6824204921722412, "rewards_train/1-w": 2.376340866088867, "rewards_train/2-2": 2.0995969772338867, "rewards_train/2-w": -0.7446955442428589, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.058761358261108, "rewards_train/margins_1": 2.7394005358219147, "rewards_train/margins_2": 2.8442925214767456, "step": 383 }, { "epoch": 1.15, "logps_train/policy_1_2": -239.58163452148438, "logps_train/policy_1_l": -210.97149658203125, "logps_train/policy_1_w": -138.3822479248047, "logps_train/policy_2_2": -163.01025390625, "logps_train/policy_2_w": -222.8698272705078, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -1.8980059623718262, "rewards_train/1-l": -2.0928521156311035, "rewards_train/1-w": 3.3049397468566895, "rewards_train/2-2": 3.2349119186401367, "rewards_train/2-w": -1.5287799835205078, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.397791862487793, "rewards_train/margins_1": 5.202945709228516, "rewards_train/margins_2": 4.7636919021606445, "step": 383 }, { "epoch": 1.15, "learning_rate": 2.1064746305037595e-06, "loss": 0.7844, "step": 384 }, { "epoch": 1.15, "logps_train/policy_1_2": -278.2189025878906, "logps_train/policy_1_l": -171.77499389648438, "logps_train/policy_1_w": -135.00538635253906, "logps_train/policy_2_2": -192.24905395507812, "logps_train/policy_2_w": -209.65768432617188, "logps_train/ref_1_2": -255.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -227.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -2.297672748565674, "rewards_train/1-l": -2.33190655708313, "rewards_train/1-w": 3.4398303031921387, "rewards_train/2-2": 3.4598608016967773, "rewards_train/2-w": -1.1934285163879395, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.7717368602752686, "rewards_train/margins_1": 5.7375030517578125, "rewards_train/margins_2": 4.653289318084717, "step": 384 }, { "epoch": 1.15, "logps_train/policy_1_2": -165.1116485595703, "logps_train/policy_1_l": -144.47964477539062, "logps_train/policy_1_w": -83.33472442626953, "logps_train/policy_2_2": -109.86091613769531, "logps_train/policy_2_w": -140.234130859375, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -108.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": -1.5791332721710205, "rewards_train/1-l": -2.0141756534576416, "rewards_train/1-w": 2.5290277004241943, "rewards_train/2-2": 2.1521897315979004, "rewards_train/2-w": -1.0851311683654785, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.543203353881836, "rewards_train/margins_1": 4.108160972595215, "rewards_train/margins_2": 3.237320899963379, "step": 384 }, { "epoch": 1.15, "logps_train/policy_1_2": -128.07012939453125, "logps_train/policy_1_l": -108.97561645507812, "logps_train/policy_1_w": -91.57488250732422, "logps_train/policy_2_2": -78.49187469482422, "logps_train/policy_2_w": -138.32577514648438, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -91.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -98.5, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": -1.2601381540298462, "rewards_train/1-l": -1.8140535354614258, "rewards_train/1-w": 2.6194653511047363, "rewards_train/2-2": 2.001594066619873, "rewards_train/2-w": -0.7247654795646667, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.433518886566162, "rewards_train/margins_1": 3.8796035051345825, "rewards_train/margins_2": 2.72635954618454, "step": 384 }, { "epoch": 1.15, "logps_train/policy_1_2": -206.7357940673828, "logps_train/policy_1_l": -212.87860107421875, "logps_train/policy_1_w": -152.89288330078125, "logps_train/policy_2_2": -141.55609130859375, "logps_train/policy_2_w": -238.65194702148438, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -1.691547155380249, "rewards_train/1-l": -2.5798397064208984, "rewards_train/1-w": 3.8853342533111572, "rewards_train/2-2": 2.704155921936035, "rewards_train/2-w": -1.3177340030670166, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.465173959732056, "rewards_train/margins_1": 5.576881408691406, "rewards_train/margins_2": 4.021889925003052, "step": 384 }, { "epoch": 1.15, "logps_train/policy_1_2": -219.902587890625, "logps_train/policy_1_l": -219.34783935546875, "logps_train/policy_1_w": -160.29507446289062, "logps_train/policy_2_2": -156.228271484375, "logps_train/policy_2_w": -229.8818359375, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": -0.5621351003646851, "rewards_train/1-l": -2.8986504077911377, "rewards_train/1-w": 4.200765609741211, "rewards_train/2-2": 3.4709224700927734, "rewards_train/2-w": -0.08662177622318268, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.099416017532349, "rewards_train/margins_1": 4.762900710105896, "rewards_train/margins_2": 3.557544246315956, "step": 385 }, { "epoch": 1.15, "logps_train/policy_1_2": -190.4246826171875, "logps_train/policy_1_l": -152.30615234375, "logps_train/policy_1_w": -114.88249206542969, "logps_train/policy_2_2": -122.78843688964844, "logps_train/policy_2_w": -182.26040649414062, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -1.5991096496582031, "rewards_train/1-l": -2.4256350994110107, "rewards_train/1-w": 2.917365550994873, "rewards_train/2-2": 3.2801406383514404, "rewards_train/2-w": -1.142350435256958, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.343000650405884, "rewards_train/margins_1": 4.516475200653076, "rewards_train/margins_2": 4.422491073608398, "step": 385 }, { "epoch": 1.15, "logps_train/policy_1_2": -189.4923095703125, "logps_train/policy_1_l": -225.8267822265625, "logps_train/policy_1_w": -151.08029174804688, "logps_train/policy_2_2": -118.20404052734375, "logps_train/policy_2_w": -239.2527618408203, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": -1.327355146408081, "rewards_train/1-l": -2.92252254486084, "rewards_train/1-w": 3.5732216835021973, "rewards_train/2-2": 2.992095947265625, "rewards_train/2-w": -1.826838493347168, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.495744228363037, "rewards_train/margins_1": 4.900576829910278, "rewards_train/margins_2": 4.818934440612793, "step": 385 }, { "epoch": 1.15, "logps_train/policy_1_2": -103.98397827148438, "logps_train/policy_1_l": -107.19005584716797, "logps_train/policy_1_w": -104.60966491699219, "logps_train/policy_2_2": -61.984256744384766, "logps_train/policy_2_w": -149.11866760253906, "logps_train/ref_1_2": -95.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -81.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -0.8807225227355957, "rewards_train/1-l": -1.2849358320236206, "rewards_train/1-w": 2.1845412254333496, "rewards_train/2-2": 1.8743282556533813, "rewards_train/2-w": -0.8657733201980591, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.46947705745697, "rewards_train/margins_1": 3.0652637481689453, "rewards_train/margins_2": 2.7401015758514404, "step": 385 }, { "epoch": 1.16, "learning_rate": 2.0821040783960422e-06, "loss": 0.6591, "step": 386 }, { "epoch": 1.16, "logps_train/policy_1_2": -185.08975219726562, "logps_train/policy_1_l": -185.19090270996094, "logps_train/policy_1_w": -98.70816040039062, "logps_train/policy_2_2": -116.92737579345703, "logps_train/policy_2_w": -155.70896911621094, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -1.4374912977218628, "rewards_train/1-l": -2.928464412689209, "rewards_train/1-w": 2.8920741081237793, "rewards_train/2-2": 3.394371747970581, "rewards_train/2-w": -0.6927715539932251, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.820538520812988, "rewards_train/margins_1": 4.329565405845642, "rewards_train/margins_2": 4.087143301963806, "step": 386 }, { "epoch": 1.16, "logps_train/policy_1_2": -223.50588989257812, "logps_train/policy_1_l": -255.5313262939453, "logps_train/policy_1_w": -136.38009643554688, "logps_train/policy_2_2": -155.59844970703125, "logps_train/policy_2_w": -203.83218383789062, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -223.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.8352568745613098, "rewards_train/1-l": -3.2496166229248047, "rewards_train/1-w": 3.0702919960021973, "rewards_train/2-2": 3.7477731704711914, "rewards_train/2-w": -1.3412247896194458, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.319908618927002, "rewards_train/margins_1": 3.905548870563507, "rewards_train/margins_2": 5.088997960090637, "step": 386 }, { "epoch": 1.16, "logps_train/policy_1_2": -244.13528442382812, "logps_train/policy_1_l": -220.3174591064453, "logps_train/policy_1_w": -127.1434326171875, "logps_train/policy_2_2": -160.31358337402344, "logps_train/policy_2_w": -211.32652282714844, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -1.1979033946990967, "rewards_train/1-l": -2.560065746307373, "rewards_train/1-w": 2.999134063720703, "rewards_train/2-2": 3.9245004653930664, "rewards_train/2-w": -1.8699567317962646, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.559199810028076, "rewards_train/margins_1": 4.1970374584198, "rewards_train/margins_2": 5.794457197189331, "step": 386 }, { "epoch": 1.16, "logps_train/policy_1_2": -188.74525451660156, "logps_train/policy_1_l": -119.35047912597656, "logps_train/policy_1_w": -131.14443969726562, "logps_train/policy_2_2": -119.00619506835938, "logps_train/policy_2_w": -191.2864227294922, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -1.428431749343872, "rewards_train/1-l": -2.012831211090088, "rewards_train/1-w": 3.10371994972229, "rewards_train/2-2": 3.286099672317505, "rewards_train/2-w": -0.8382133841514587, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.116551160812378, "rewards_train/margins_1": 4.532151699066162, "rewards_train/margins_2": 4.124313056468964, "step": 386 }, { "epoch": 1.16, "logps_train/policy_1_2": -251.2752685546875, "logps_train/policy_1_l": -235.35690307617188, "logps_train/policy_1_w": -178.33969116210938, "logps_train/policy_2_2": -172.478759765625, "logps_train/policy_2_w": -278.73480224609375, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -213.0, "logps_train/ref_2_w": -253.0, "rewards_train/1-2": -1.1572141647338867, "rewards_train/1-l": -2.6134729385375977, "rewards_train/1-w": 3.580873489379883, "rewards_train/2-2": 4.048999309539795, "rewards_train/2-w": -2.52601957321167, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.1943464279174805, "rewards_train/margins_1": 4.7380876541137695, "rewards_train/margins_2": 6.575018882751465, "step": 387 }, { "epoch": 1.16, "logps_train/policy_1_2": -175.78616333007812, "logps_train/policy_1_l": -192.2008056640625, "logps_train/policy_1_w": -122.62382507324219, "logps_train/policy_2_2": -116.89816284179688, "logps_train/policy_2_w": -191.04592895507812, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -0.911037802696228, "rewards_train/1-l": -2.710510730743408, "rewards_train/1-w": 3.1738481521606445, "rewards_train/2-2": 2.869753837585449, "rewards_train/2-w": -1.5485868453979492, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.884358882904053, "rewards_train/margins_1": 4.084885954856873, "rewards_train/margins_2": 4.418340682983398, "step": 387 }, { "epoch": 1.16, "logps_train/policy_1_2": -153.05419921875, "logps_train/policy_1_l": -172.6392364501953, "logps_train/policy_1_w": -121.11109161376953, "logps_train/policy_2_2": -95.1317138671875, "logps_train/policy_2_w": -186.90304565429688, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.1405748128890991, "rewards_train/1-l": -2.6268138885498047, "rewards_train/1-w": 3.116234302520752, "rewards_train/2-2": 2.6200318336486816, "rewards_train/2-w": -1.361006498336792, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.743048191070557, "rewards_train/margins_1": 4.256809115409851, "rewards_train/margins_2": 3.9810383319854736, "step": 387 }, { "epoch": 1.16, "logps_train/policy_1_2": -123.84685516357422, "logps_train/policy_1_l": -83.09390258789062, "logps_train/policy_1_w": -77.1074447631836, "logps_train/policy_2_2": -74.45460510253906, "logps_train/policy_2_w": -121.397216796875, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -92.5, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": -1.461248517036438, "rewards_train/1-l": -1.3169584274291992, "rewards_train/1-w": 1.9697242975234985, "rewards_train/2-2": 1.8223135471343994, "rewards_train/2-w": -0.8998775482177734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.2866827249526978, "rewards_train/margins_1": 3.4309728145599365, "rewards_train/margins_2": 2.722191095352173, "step": 387 }, { "epoch": 1.16, "learning_rate": 2.057774312193568e-06, "loss": 0.6627, "step": 388 }, { "epoch": 1.16, "logps_train/policy_1_2": -218.39866638183594, "logps_train/policy_1_l": -221.022705078125, "logps_train/policy_1_w": -129.98580932617188, "logps_train/policy_2_2": -143.1622314453125, "logps_train/policy_2_w": -195.6918487548828, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.4883041381835938, "rewards_train/1-l": -3.118530511856079, "rewards_train/1-w": 3.365480422973633, "rewards_train/2-2": 3.5767455101013184, "rewards_train/2-w": -0.9988713264465332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.484010934829712, "rewards_train/margins_1": 4.853784561157227, "rewards_train/margins_2": 4.575616836547852, "step": 388 }, { "epoch": 1.16, "logps_train/policy_1_2": -162.97535705566406, "logps_train/policy_1_l": -145.28591918945312, "logps_train/policy_1_w": -98.80203247070312, "logps_train/policy_2_2": -101.50050354003906, "logps_train/policy_2_w": -158.81109619140625, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.6007832288742065, "rewards_train/1-l": -2.1250762939453125, "rewards_train/1-w": 2.608370780944824, "rewards_train/2-2": 2.6634256839752197, "rewards_train/2-w": -1.0936087369918823, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.733447074890137, "rewards_train/margins_1": 4.209154009819031, "rewards_train/margins_2": 3.757034420967102, "step": 388 }, { "epoch": 1.16, "logps_train/policy_1_2": -148.496826171875, "logps_train/policy_1_l": -138.54306030273438, "logps_train/policy_1_w": -75.39277648925781, "logps_train/policy_2_2": -94.70649719238281, "logps_train/policy_2_w": -123.50643920898438, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": -1.7574942111968994, "rewards_train/1-l": -3.0651464462280273, "rewards_train/1-w": 2.868535041809082, "rewards_train/2-2": 1.9484913349151611, "rewards_train/2-w": -0.2531828284263611, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.933681488037109, "rewards_train/margins_1": 4.6260292530059814, "rewards_train/margins_2": 2.201674163341522, "step": 388 }, { "epoch": 1.16, "logps_train/policy_1_2": -165.67861938476562, "logps_train/policy_1_l": -169.5701446533203, "logps_train/policy_1_w": -107.04953002929688, "logps_train/policy_2_2": -110.5108642578125, "logps_train/policy_2_w": -169.9055938720703, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.2756736278533936, "rewards_train/1-l": -2.469318389892578, "rewards_train/1-w": 2.7987582683563232, "rewards_train/2-2": 2.563366413116455, "rewards_train/2-w": -1.0569654703140259, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.268076658248901, "rewards_train/margins_1": 4.074431896209717, "rewards_train/margins_2": 3.620331883430481, "step": 388 }, { "epoch": 1.16, "logps_train/policy_1_2": -189.08547973632812, "logps_train/policy_1_l": -196.03436279296875, "logps_train/policy_1_w": -133.3074951171875, "logps_train/policy_2_2": -113.75926971435547, "logps_train/policy_2_w": -217.0779266357422, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -1.8491740226745605, "rewards_train/1-l": -2.0466017723083496, "rewards_train/1-w": 3.0797970294952393, "rewards_train/2-2": 2.90219783782959, "rewards_train/2-w": -2.1345486640930176, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.126398801803589, "rewards_train/margins_1": 4.9289710521698, "rewards_train/margins_2": 5.036746501922607, "step": 389 }, { "epoch": 1.16, "logps_train/policy_1_2": -180.0889129638672, "logps_train/policy_1_l": -162.22195434570312, "logps_train/policy_1_w": -132.553466796875, "logps_train/policy_2_2": -108.05252075195312, "logps_train/policy_2_w": -211.38812255859375, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.7754933834075928, "rewards_train/1-l": -1.7899694442749023, "rewards_train/1-w": 3.152076244354248, "rewards_train/2-2": 2.821115016937256, "rewards_train/2-w": -2.0325613021850586, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.94204568862915, "rewards_train/margins_1": 4.927569627761841, "rewards_train/margins_2": 4.8536763191223145, "step": 389 }, { "epoch": 1.16, "logps_train/policy_1_2": -236.95689392089844, "logps_train/policy_1_l": -190.88705444335938, "logps_train/policy_1_w": -141.76934814453125, "logps_train/policy_2_2": -154.32872009277344, "logps_train/policy_2_w": -210.278076171875, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -1.6851413249969482, "rewards_train/1-l": -2.471713066101074, "rewards_train/1-w": 3.01896333694458, "rewards_train/2-2": 3.4643940925598145, "rewards_train/2-w": -1.426635503768921, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.490676403045654, "rewards_train/margins_1": 4.704104661941528, "rewards_train/margins_2": 4.891029596328735, "step": 389 }, { "epoch": 1.16, "logps_train/policy_1_2": -188.85528564453125, "logps_train/policy_1_l": -148.28245544433594, "logps_train/policy_1_w": -121.61856842041016, "logps_train/policy_2_2": -122.25845336914062, "logps_train/policy_2_w": -189.73269653320312, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -1.9777166843414307, "rewards_train/1-l": -1.7932852506637573, "rewards_train/1-w": 2.76119065284729, "rewards_train/2-2": 2.580794334411621, "rewards_train/2-w": -1.6310817003250122, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.554475903511047, "rewards_train/margins_1": 4.738907337188721, "rewards_train/margins_2": 4.211876034736633, "step": 389 }, { "epoch": 1.17, "learning_rate": 2.0334877064386277e-06, "loss": 0.7032, "step": 390 }, { "epoch": 1.17, "logps_train/policy_1_2": -152.34725952148438, "logps_train/policy_1_l": -166.70118713378906, "logps_train/policy_1_w": -144.90261840820312, "logps_train/policy_2_2": -104.50428771972656, "logps_train/policy_2_w": -207.3089599609375, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -0.6466889381408691, "rewards_train/1-l": -1.7700705528259277, "rewards_train/1-w": 2.9739954471588135, "rewards_train/2-2": 2.526768684387207, "rewards_train/2-w": -1.2512085437774658, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.744065999984741, "rewards_train/margins_1": 3.6206843852996826, "rewards_train/margins_2": 3.777977228164673, "step": 390 }, { "epoch": 1.17, "logps_train/policy_1_2": -165.40283203125, "logps_train/policy_1_l": -196.88548278808594, "logps_train/policy_1_w": -155.0042724609375, "logps_train/policy_2_2": -112.76983642578125, "logps_train/policy_2_w": -215.06954956054688, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -0.4098145663738251, "rewards_train/1-l": -2.2243871688842773, "rewards_train/1-w": 3.189147472381592, "rewards_train/2-2": 3.0753602981567383, "rewards_train/2-w": -0.7218961119651794, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.413534641265869, "rewards_train/margins_1": 3.598962038755417, "rewards_train/margins_2": 3.7972564101219177, "step": 390 }, { "epoch": 1.17, "logps_train/policy_1_2": -191.9835205078125, "logps_train/policy_1_l": -185.1768798828125, "logps_train/policy_1_w": -99.37477111816406, "logps_train/policy_2_2": -117.88886260986328, "logps_train/policy_2_w": -167.1822967529297, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": -1.6688604354858398, "rewards_train/1-l": -2.825890302658081, "rewards_train/1-w": 2.7603743076324463, "rewards_train/2-2": 2.97556734085083, "rewards_train/2-w": -1.255730390548706, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.586264610290527, "rewards_train/margins_1": 4.429234743118286, "rewards_train/margins_2": 4.231297731399536, "step": 390 }, { "epoch": 1.17, "logps_train/policy_1_2": -161.1344757080078, "logps_train/policy_1_l": -196.60137939453125, "logps_train/policy_1_w": -120.91165924072266, "logps_train/policy_2_2": -102.47479248046875, "logps_train/policy_2_w": -177.47930908203125, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.1251661777496338, "rewards_train/1-l": -3.520099401473999, "rewards_train/1-w": 2.629537582397461, "rewards_train/2-2": 2.4997873306274414, "rewards_train/2-w": -1.1565239429473877, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.14963698387146, "rewards_train/margins_1": 3.7547037601470947, "rewards_train/margins_2": 3.656311273574829, "step": 390 }, { "epoch": 1.17, "logps_train/policy_1_2": -188.48269653320312, "logps_train/policy_1_l": -223.31240844726562, "logps_train/policy_1_w": -121.14283752441406, "logps_train/policy_2_2": -117.6703109741211, "logps_train/policy_2_w": -202.47113037109375, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.4754183292388916, "rewards_train/1-l": -3.371767520904541, "rewards_train/1-w": 3.4629621505737305, "rewards_train/2-2": 3.1413674354553223, "rewards_train/2-w": -1.6236752271652222, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.8347296714782715, "rewards_train/margins_1": 4.938380479812622, "rewards_train/margins_2": 4.765042662620544, "step": 391 }, { "epoch": 1.17, "logps_train/policy_1_2": -131.34344482421875, "logps_train/policy_1_l": -108.06364440917969, "logps_train/policy_1_w": -69.98519897460938, "logps_train/policy_2_2": -76.8636474609375, "logps_train/policy_2_w": -125.18496704101562, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -87.5, "logps_train/ref_1_w": -88.0, "logps_train/ref_2_2": -96.5, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": -1.2834649085998535, "rewards_train/1-l": -2.0442559719085693, "rewards_train/1-w": 1.793863296508789, "rewards_train/2-2": 1.9819945096969604, "rewards_train/2-w": -1.635391116142273, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8381192684173584, "rewards_train/margins_1": 3.0773282051086426, "rewards_train/margins_2": 3.6173856258392334, "step": 391 }, { "epoch": 1.17, "logps_train/policy_1_2": -194.72462463378906, "logps_train/policy_1_l": -177.50106811523438, "logps_train/policy_1_w": -114.51948547363281, "logps_train/policy_2_2": -115.2021484375, "logps_train/policy_2_w": -175.5707550048828, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -1.982618808746338, "rewards_train/1-l": -1.9899511337280273, "rewards_train/1-w": 2.488579034805298, "rewards_train/2-2": 3.0543932914733887, "rewards_train/2-w": -1.2115674018859863, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.478530168533325, "rewards_train/margins_1": 4.471197843551636, "rewards_train/margins_2": 4.265960693359375, "step": 391 }, { "epoch": 1.17, "logps_train/policy_1_2": -208.6177215576172, "logps_train/policy_1_l": -197.89308166503906, "logps_train/policy_1_w": -118.69312286376953, "logps_train/policy_2_2": -140.9627685546875, "logps_train/policy_2_w": -179.8712158203125, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -1.7012255191802979, "rewards_train/1-l": -2.691260814666748, "rewards_train/1-w": 2.5404534339904785, "rewards_train/2-2": 2.872082233428955, "rewards_train/2-w": -0.9265740513801575, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.231714248657227, "rewards_train/margins_1": 4.241678953170776, "rewards_train/margins_2": 3.7986562848091125, "step": 391 }, { "epoch": 1.17, "learning_rate": 2.009246631461129e-06, "loss": 0.6113, "step": 392 }, { "epoch": 1.17, "logps_train/policy_1_2": -175.272705078125, "logps_train/policy_1_l": -180.45736694335938, "logps_train/policy_1_w": -134.59268188476562, "logps_train/policy_2_2": -116.12161254882812, "logps_train/policy_2_w": -208.84579467773438, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -1.456568717956543, "rewards_train/1-l": -2.3668301105499268, "rewards_train/1-w": 3.1817476749420166, "rewards_train/2-2": 2.5238733291625977, "rewards_train/2-w": -1.629502534866333, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.548577785491943, "rewards_train/margins_1": 4.63831639289856, "rewards_train/margins_2": 4.153375864028931, "step": 392 }, { "epoch": 1.17, "logps_train/policy_1_2": -225.1822052001953, "logps_train/policy_1_l": -191.5820770263672, "logps_train/policy_1_w": -116.42412567138672, "logps_train/policy_2_2": -143.55853271484375, "logps_train/policy_2_w": -180.91152954101562, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.605721354484558, "rewards_train/1-l": -2.416996479034424, "rewards_train/1-w": 2.9324164390563965, "rewards_train/2-2": 3.4554758071899414, "rewards_train/2-w": -1.1051170825958252, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.34941291809082, "rewards_train/margins_1": 4.538137793540955, "rewards_train/margins_2": 4.560592889785767, "step": 392 }, { "epoch": 1.17, "logps_train/policy_1_2": -175.40863037109375, "logps_train/policy_1_l": -155.7423095703125, "logps_train/policy_1_w": -129.91983032226562, "logps_train/policy_2_2": -113.53843688964844, "logps_train/policy_2_w": -204.30279541015625, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.3064885139465332, "rewards_train/1-l": -1.8711299896240234, "rewards_train/1-w": 3.191610336303711, "rewards_train/2-2": 2.537172317504883, "rewards_train/2-w": -1.4466865062713623, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.062740325927734, "rewards_train/margins_1": 4.498098850250244, "rewards_train/margins_2": 3.983858823776245, "step": 392 }, { "epoch": 1.17, "logps_train/policy_1_2": -179.46156311035156, "logps_train/policy_1_l": -198.3714141845703, "logps_train/policy_1_w": -141.87611389160156, "logps_train/policy_2_2": -110.87674713134766, "logps_train/policy_2_w": -215.2982177734375, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -1.341469645500183, "rewards_train/1-l": -2.5755198001861572, "rewards_train/1-w": 3.4577012062072754, "rewards_train/2-2": 2.9193572998046875, "rewards_train/2-w": -1.2235709428787231, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.033221006393433, "rewards_train/margins_1": 4.7991708517074585, "rewards_train/margins_2": 4.142928242683411, "step": 392 }, { "epoch": 1.18, "logps_train/policy_1_2": -162.32875061035156, "logps_train/policy_1_l": -159.2198944091797, "logps_train/policy_1_w": -92.65188598632812, "logps_train/policy_2_2": -103.521240234375, "logps_train/policy_2_w": -153.22006225585938, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.2277960777282715, "rewards_train/1-l": -2.529313802719116, "rewards_train/1-w": 2.4978976249694824, "rewards_train/2-2": 2.5541257858276367, "rewards_train/2-w": -1.2681012153625488, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.027211427688599, "rewards_train/margins_1": 3.725693702697754, "rewards_train/margins_2": 3.8222270011901855, "step": 393 }, { "epoch": 1.18, "logps_train/policy_1_2": -227.71754455566406, "logps_train/policy_1_l": -171.27146911621094, "logps_train/policy_1_w": -139.27230834960938, "logps_train/policy_2_2": -154.15663146972656, "logps_train/policy_2_w": -221.3240966796875, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -2.0920660495758057, "rewards_train/1-l": -2.054295301437378, "rewards_train/1-w": 3.9281392097473145, "rewards_train/2-2": 2.8705673217773438, "rewards_train/2-w": -0.9656134843826294, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.982434511184692, "rewards_train/margins_1": 6.02020525932312, "rewards_train/margins_2": 3.836180806159973, "step": 393 }, { "epoch": 1.18, "logps_train/policy_1_2": -129.338623046875, "logps_train/policy_1_l": -159.3688201904297, "logps_train/policy_1_w": -107.24095153808594, "logps_train/policy_2_2": -85.93256378173828, "logps_train/policy_2_w": -171.60693359375, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -0.6535885334014893, "rewards_train/1-l": -2.223527669906616, "rewards_train/1-w": 2.731348752975464, "rewards_train/2-2": 2.3085012435913086, "rewards_train/2-w": -1.3423811197280884, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.95487642288208, "rewards_train/margins_1": 3.384937286376953, "rewards_train/margins_2": 3.650882363319397, "step": 393 }, { "epoch": 1.18, "logps_train/policy_1_2": -233.45748901367188, "logps_train/policy_1_l": -195.93923950195312, "logps_train/policy_1_w": -147.25167846679688, "logps_train/policy_2_2": -148.68226623535156, "logps_train/policy_2_w": -231.67959594726562, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -1.8109817504882812, "rewards_train/1-l": -2.7079858779907227, "rewards_train/1-w": 3.5056920051574707, "rewards_train/2-2": 3.3697614669799805, "rewards_train/2-w": -1.3487197160720825, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.213677883148193, "rewards_train/margins_1": 5.316673755645752, "rewards_train/margins_2": 4.718481183052063, "step": 393 }, { "epoch": 1.18, "learning_rate": 1.9850534531472544e-06, "loss": 0.7789, "step": 394 }, { "epoch": 1.18, "logps_train/policy_1_2": -188.40072631835938, "logps_train/policy_1_l": -215.28512573242188, "logps_train/policy_1_w": -155.87652587890625, "logps_train/policy_2_2": -133.54190063476562, "logps_train/policy_2_w": -222.06541442871094, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -0.9246925115585327, "rewards_train/1-l": -2.785836696624756, "rewards_train/1-w": 2.947844982147217, "rewards_train/2-2": 2.6819427013397217, "rewards_train/2-w": -0.9465804100036621, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 5.733681678771973, "rewards_train/margins_1": 3.8725374937057495, "rewards_train/margins_2": 3.628523111343384, "step": 394 }, { "epoch": 1.18, "logps_train/policy_1_2": -194.77796936035156, "logps_train/policy_1_l": -199.85308837890625, "logps_train/policy_1_w": -151.97862243652344, "logps_train/policy_2_2": -130.69375610351562, "logps_train/policy_2_w": -222.84185791015625, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -1.4129527807235718, "rewards_train/1-l": -3.089801788330078, "rewards_train/1-w": 2.7197165489196777, "rewards_train/2-2": 2.843904972076416, "rewards_train/2-w": -1.7681721448898315, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.809518337249756, "rewards_train/margins_1": 4.1326693296432495, "rewards_train/margins_2": 4.612077116966248, "step": 394 }, { "epoch": 1.18, "logps_train/policy_1_2": -272.3035888671875, "logps_train/policy_1_l": -256.92474365234375, "logps_train/policy_1_w": -191.89552307128906, "logps_train/policy_2_2": -190.58700561523438, "logps_train/policy_2_w": -297.2013854980469, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -243.0, "logps_train/ref_2_2": -230.0, "logps_train/ref_2_w": -282.0, "rewards_train/1-2": -1.1573134660720825, "rewards_train/1-l": -2.877434253692627, "rewards_train/1-w": 5.094821929931641, "rewards_train/2-2": 3.9880757331848145, "rewards_train/2-w": -1.5506088733673096, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.972256183624268, "rewards_train/margins_1": 6.252135396003723, "rewards_train/margins_2": 5.538684606552124, "step": 394 }, { "epoch": 1.18, "logps_train/policy_1_2": -213.94461059570312, "logps_train/policy_1_l": -215.49197387695312, "logps_train/policy_1_w": -152.6506805419922, "logps_train/policy_2_2": -153.19815063476562, "logps_train/policy_2_w": -232.5315399169922, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": -0.8022745847702026, "rewards_train/1-l": -2.349003791809082, "rewards_train/1-w": 3.495088577270508, "rewards_train/2-2": 2.936239242553711, "rewards_train/2-w": -1.6342074871063232, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.84409236907959, "rewards_train/margins_1": 4.2973631620407104, "rewards_train/margins_2": 4.570446729660034, "step": 394 }, { "epoch": 1.18, "logps_train/policy_1_2": -274.4900817871094, "logps_train/policy_1_l": -269.24560546875, "logps_train/policy_1_w": -169.42617797851562, "logps_train/policy_2_2": -181.0340576171875, "logps_train/policy_2_w": -258.6661071777344, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -237.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -223.0, "logps_train/ref_2_w": -239.0, "rewards_train/1-2": -1.4279154539108276, "rewards_train/1-l": -3.269972324371338, "rewards_train/1-w": 3.860116720199585, "rewards_train/2-2": 4.167688369750977, "rewards_train/2-w": -1.9748151302337646, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.130089044570923, "rewards_train/margins_1": 5.288032174110413, "rewards_train/margins_2": 6.142503499984741, "step": 395 }, { "epoch": 1.18, "logps_train/policy_1_2": -159.38412475585938, "logps_train/policy_1_l": -131.78915405273438, "logps_train/policy_1_w": -111.78840637207031, "logps_train/policy_2_2": -107.61652374267578, "logps_train/policy_2_w": -165.4339141845703, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -0.8766947388648987, "rewards_train/1-l": -1.8023529052734375, "rewards_train/1-w": 3.2555344104766846, "rewards_train/2-2": 2.5422539710998535, "rewards_train/2-w": -0.4019858241081238, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.057887315750122, "rewards_train/margins_1": 4.132229149341583, "rewards_train/margins_2": 2.9442397952079773, "step": 395 }, { "epoch": 1.18, "logps_train/policy_1_2": -133.12472534179688, "logps_train/policy_1_l": -137.3911895751953, "logps_train/policy_1_w": -75.13821411132812, "logps_train/policy_2_2": -86.23405456542969, "logps_train/policy_2_w": -131.11819458007812, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -121.0, "rewards_train/1-2": -0.8421585559844971, "rewards_train/1-l": -1.877204418182373, "rewards_train/1-w": 2.344576835632324, "rewards_train/2-2": 2.2207353115081787, "rewards_train/2-w": -0.9864286184310913, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.221781253814697, "rewards_train/margins_1": 3.1867353916168213, "rewards_train/margins_2": 3.20716392993927, "step": 395 }, { "epoch": 1.18, "logps_train/policy_1_2": -206.95106506347656, "logps_train/policy_1_l": -175.13937377929688, "logps_train/policy_1_w": -121.93325805664062, "logps_train/policy_2_2": -145.43475341796875, "logps_train/policy_2_w": -185.1978759765625, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -1.420496940612793, "rewards_train/1-l": -2.2875704765319824, "rewards_train/1-w": 3.0574557781219482, "rewards_train/2-2": 2.459258794784546, "rewards_train/2-w": -0.9858015775680542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.345026254653931, "rewards_train/margins_1": 4.477952718734741, "rewards_train/margins_2": 3.4450603723526, "step": 395 }, { "epoch": 1.19, "learning_rate": 1.960910532708558e-06, "loss": 0.5711, "step": 396 }, { "epoch": 1.19, "logps_train/policy_1_2": -188.79177856445312, "logps_train/policy_1_l": -150.09811401367188, "logps_train/policy_1_w": -109.62283325195312, "logps_train/policy_2_2": -118.2662353515625, "logps_train/policy_2_w": -181.70660400390625, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -2.010037660598755, "rewards_train/1-l": -2.40395188331604, "rewards_train/1-w": 3.000997543334961, "rewards_train/2-2": 2.63509464263916, "rewards_train/2-w": -1.5163629055023193, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.404949426651001, "rewards_train/margins_1": 5.011035203933716, "rewards_train/margins_2": 4.1514575481414795, "step": 396 }, { "epoch": 1.19, "logps_train/policy_1_2": -235.72042846679688, "logps_train/policy_1_l": -175.16775512695312, "logps_train/policy_1_w": -180.43482971191406, "logps_train/policy_2_2": -155.27322387695312, "logps_train/policy_2_w": -294.16595458984375, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -244.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -290.0, "rewards_train/1-2": -1.8890830278396606, "rewards_train/1-l": -1.9523224830627441, "rewards_train/1-w": 6.309788227081299, "rewards_train/2-2": 3.345628023147583, "rewards_train/2-w": -0.41747355461120605, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 8.262110710144043, "rewards_train/margins_1": 8.19887125492096, "rewards_train/margins_2": 3.763101577758789, "step": 396 }, { "epoch": 1.19, "logps_train/policy_1_2": -188.39279174804688, "logps_train/policy_1_l": -195.24595642089844, "logps_train/policy_1_w": -105.51828002929688, "logps_train/policy_2_2": -122.19515991210938, "logps_train/policy_2_w": -178.31085205078125, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.5818580389022827, "rewards_train/1-l": -2.5396335124969482, "rewards_train/1-w": 2.721219062805176, "rewards_train/2-2": 2.8686680793762207, "rewards_train/2-w": -1.888506531715393, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.260852575302124, "rewards_train/margins_1": 4.3030771017074585, "rewards_train/margins_2": 4.757174611091614, "step": 396 }, { "epoch": 1.19, "logps_train/policy_1_2": -191.9580078125, "logps_train/policy_1_l": -192.52740478515625, "logps_train/policy_1_w": -168.55795288085938, "logps_train/policy_2_2": -143.64727783203125, "logps_train/policy_2_w": -224.40249633789062, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -0.39736324548721313, "rewards_train/1-l": -2.083012580871582, "rewards_train/1-w": 3.1326823234558105, "rewards_train/2-2": 2.844451427459717, "rewards_train/2-w": -0.4340001046657562, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.215694904327393, "rewards_train/margins_1": 3.5300455689430237, "rewards_train/margins_2": 3.278451532125473, "step": 396 }, { "epoch": 1.19, "logps_train/policy_1_2": -182.9009552001953, "logps_train/policy_1_l": -149.9196319580078, "logps_train/policy_1_w": -99.216796875, "logps_train/policy_2_2": -115.49205017089844, "logps_train/policy_2_w": -155.6188507080078, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -1.701815128326416, "rewards_train/1-l": -2.010542631149292, "rewards_train/1-w": 2.592578172683716, "rewards_train/2-2": 2.5687644481658936, "rewards_train/2-w": -1.1864941120147705, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.603120803833008, "rewards_train/margins_1": 4.294393301010132, "rewards_train/margins_2": 3.755258560180664, "step": 397 }, { "epoch": 1.19, "logps_train/policy_1_2": -140.39378356933594, "logps_train/policy_1_l": -80.94768524169922, "logps_train/policy_1_w": -86.19017028808594, "logps_train/policy_2_2": -79.99942779541016, "logps_train/policy_2_w": -142.98355102539062, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -69.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -1.7874256372451782, "rewards_train/1-l": -1.1888477802276611, "rewards_train/1-w": 2.431044101715088, "rewards_train/2-2": 2.0756430625915527, "rewards_train/2-w": -1.2878694534301758, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.619891881942749, "rewards_train/margins_1": 4.218469738960266, "rewards_train/margins_2": 3.3635125160217285, "step": 397 }, { "epoch": 1.19, "logps_train/policy_1_2": -131.82168579101562, "logps_train/policy_1_l": -113.10929870605469, "logps_train/policy_1_w": -82.82127380371094, "logps_train/policy_2_2": -85.13125610351562, "logps_train/policy_2_w": -138.28968811035156, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": -1.0821691751480103, "rewards_train/1-l": -1.9168384075164795, "rewards_train/1-w": 2.1701183319091797, "rewards_train/2-2": 2.109335422515869, "rewards_train/2-w": -1.43248450756073, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.086956739425659, "rewards_train/margins_1": 3.25228750705719, "rewards_train/margins_2": 3.541819930076599, "step": 397 }, { "epoch": 1.19, "logps_train/policy_1_2": -206.5978546142578, "logps_train/policy_1_l": -199.28451538085938, "logps_train/policy_1_w": -102.96279907226562, "logps_train/policy_2_2": -143.2042694091797, "logps_train/policy_2_w": -169.17115783691406, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.0965032577514648, "rewards_train/1-l": -2.6315770149230957, "rewards_train/1-w": 3.0972743034362793, "rewards_train/2-2": 2.8616039752960205, "rewards_train/2-w": -1.0327410697937012, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.728851318359375, "rewards_train/margins_1": 4.193777561187744, "rewards_train/margins_2": 3.8943450450897217, "step": 397 }, { "epoch": 1.19, "learning_rate": 1.9368202264515127e-06, "loss": 1.0096, "step": 398 }, { "epoch": 1.19, "logps_train/policy_1_2": -157.72769165039062, "logps_train/policy_1_l": -159.05007934570312, "logps_train/policy_1_w": -124.06267547607422, "logps_train/policy_2_2": -94.79460906982422, "logps_train/policy_2_w": -193.20901489257812, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -1.6356606483459473, "rewards_train/1-l": -2.141920566558838, "rewards_train/1-w": 3.0243961811065674, "rewards_train/2-2": 2.1367504596710205, "rewards_train/2-w": -1.6354522705078125, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.166316747665405, "rewards_train/margins_1": 4.660056829452515, "rewards_train/margins_2": 3.772202730178833, "step": 398 }, { "epoch": 1.19, "logps_train/policy_1_2": -239.24612426757812, "logps_train/policy_1_l": -223.10171508789062, "logps_train/policy_1_w": -149.518310546875, "logps_train/policy_2_2": -171.15982055664062, "logps_train/policy_2_w": -207.20901489257812, "logps_train/ref_1_2": -233.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -0.5519557595252991, "rewards_train/1-l": -2.6819496154785156, "rewards_train/1-w": 3.56516170501709, "rewards_train/2-2": 3.6277666091918945, "rewards_train/2-w": 0.005073875188827515, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 6.2471113204956055, "rewards_train/margins_1": 4.117117464542389, "rewards_train/margins_2": 3.622692734003067, "step": 398 }, { "epoch": 1.19, "logps_train/policy_1_2": -188.5609130859375, "logps_train/policy_1_l": -199.04771423339844, "logps_train/policy_1_w": -148.88796997070312, "logps_train/policy_2_2": -138.51242065429688, "logps_train/policy_2_w": -206.75723266601562, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": -0.8090215921401978, "rewards_train/1-l": -2.434459686279297, "rewards_train/1-w": 3.1838583946228027, "rewards_train/2-2": 2.5945582389831543, "rewards_train/2-w": -0.5964267253875732, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.6183180809021, "rewards_train/margins_1": 3.9928799867630005, "rewards_train/margins_2": 3.1909849643707275, "step": 398 }, { "epoch": 1.19, "logps_train/policy_1_2": -185.29196166992188, "logps_train/policy_1_l": -227.49957275390625, "logps_train/policy_1_w": -142.36282348632812, "logps_train/policy_2_2": -130.47332763671875, "logps_train/policy_2_w": -201.35897827148438, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.8670858144760132, "rewards_train/1-l": -3.1333558559417725, "rewards_train/1-w": 2.9152801036834717, "rewards_train/2-2": 2.6710267066955566, "rewards_train/2-w": -1.0944910049438477, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.048635959625244, "rewards_train/margins_1": 3.782365918159485, "rewards_train/margins_2": 3.7655177116394043, "step": 398 }, { "epoch": 1.19, "logps_train/policy_1_2": -169.67501831054688, "logps_train/policy_1_l": -133.12490844726562, "logps_train/policy_1_w": -126.9898681640625, "logps_train/policy_2_2": -104.28734588623047, "logps_train/policy_2_w": -201.27752685546875, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.25929856300354, "rewards_train/1-l": -1.963273048400879, "rewards_train/1-w": 3.729919672012329, "rewards_train/2-2": 2.8892340660095215, "rewards_train/2-w": -1.1504101753234863, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.693192720413208, "rewards_train/margins_1": 4.989218235015869, "rewards_train/margins_2": 4.039644241333008, "step": 399 }, { "epoch": 1.19, "logps_train/policy_1_2": -95.59480285644531, "logps_train/policy_1_l": -84.69425964355469, "logps_train/policy_1_w": -53.11843490600586, "logps_train/policy_2_2": -58.61467742919922, "logps_train/policy_2_w": -85.82369995117188, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -69.0, "logps_train/ref_1_w": -69.0, "logps_train/ref_2_2": -74.0, "logps_train/ref_2_w": -77.0, "rewards_train/1-2": -0.8054635524749756, "rewards_train/1-l": -1.581242561340332, "rewards_train/1-w": 1.5795629024505615, "rewards_train/2-2": 1.5664774179458618, "rewards_train/2-w": -0.8556123971939087, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.1608054637908936, "rewards_train/margins_1": 2.385026454925537, "rewards_train/margins_2": 2.4220898151397705, "step": 399 }, { "epoch": 1.19, "logps_train/policy_1_2": -184.6644287109375, "logps_train/policy_1_l": -196.24630737304688, "logps_train/policy_1_w": -125.5884017944336, "logps_train/policy_2_2": -120.79042053222656, "logps_train/policy_2_w": -186.84439086914062, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.2851927280426025, "rewards_train/1-l": -2.414914846420288, "rewards_train/1-w": 2.7437474727630615, "rewards_train/2-2": 2.928673267364502, "rewards_train/2-w": -0.7384904623031616, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.15866231918335, "rewards_train/margins_1": 4.028940200805664, "rewards_train/margins_2": 3.6671637296676636, "step": 399 }, { "epoch": 1.19, "logps_train/policy_1_2": -168.31076049804688, "logps_train/policy_1_l": -147.10272216796875, "logps_train/policy_1_w": -110.66088104248047, "logps_train/policy_2_2": -111.73988342285156, "logps_train/policy_2_w": -172.9373321533203, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -0.9756069779396057, "rewards_train/1-l": -1.780901551246643, "rewards_train/1-w": 3.166334390640259, "rewards_train/2-2": 2.6560893058776855, "rewards_train/2-w": -0.9070145487785339, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.947235941886902, "rewards_train/margins_1": 4.1419413685798645, "rewards_train/margins_2": 3.5631038546562195, "step": 399 }, { "epoch": 1.2, "learning_rate": 1.912784885547541e-06, "loss": 0.7395, "step": 400 }, { "epoch": 1.2, "logps_train/policy_1_2": -192.83633422851562, "logps_train/policy_1_l": -162.60226440429688, "logps_train/policy_1_w": -115.5942611694336, "logps_train/policy_2_2": -128.87648010253906, "logps_train/policy_2_w": -184.27706909179688, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -0.5597550272941589, "rewards_train/1-l": -2.1336164474487305, "rewards_train/1-w": 3.125046491622925, "rewards_train/2-2": 3.3402323722839355, "rewards_train/2-w": -1.1024624109268188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.258662939071655, "rewards_train/margins_1": 3.6848015189170837, "rewards_train/margins_2": 4.442694783210754, "step": 400 }, { "epoch": 1.2, "logps_train/policy_1_2": -148.6756591796875, "logps_train/policy_1_l": -149.41265869140625, "logps_train/policy_1_w": -149.63624572753906, "logps_train/policy_2_2": -86.59419250488281, "logps_train/policy_2_w": -244.84130859375, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -1.6933482885360718, "rewards_train/1-l": -2.244096279144287, "rewards_train/1-w": 3.2846174240112305, "rewards_train/2-2": 2.188237190246582, "rewards_train/2-w": -2.4622554779052734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.528713703155518, "rewards_train/margins_1": 4.977965712547302, "rewards_train/margins_2": 4.6504926681518555, "step": 400 }, { "epoch": 1.2, "logps_train/policy_1_2": -185.06488037109375, "logps_train/policy_1_l": -168.76271057128906, "logps_train/policy_1_w": -135.46827697753906, "logps_train/policy_2_2": -121.62599182128906, "logps_train/policy_2_w": -195.32534790039062, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.6764100790023804, "rewards_train/1-l": -2.3572769165039062, "rewards_train/1-w": 3.1984846591949463, "rewards_train/2-2": 3.5221662521362305, "rewards_train/2-w": -0.5692545771598816, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.5557615756988525, "rewards_train/margins_1": 3.8748947381973267, "rewards_train/margins_2": 4.091420829296112, "step": 400 }, { "epoch": 1.2, "logps_train/policy_1_2": -206.56369018554688, "logps_train/policy_1_l": -163.77565002441406, "logps_train/policy_1_w": -149.1715087890625, "logps_train/policy_2_2": -134.94325256347656, "logps_train/policy_2_w": -217.1105499267578, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -1.2876189947128296, "rewards_train/1-l": -1.6862068176269531, "rewards_train/1-w": 3.525036334991455, "rewards_train/2-2": 3.4212634563446045, "rewards_train/2-w": -0.8743370771408081, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.211243152618408, "rewards_train/margins_1": 4.812655329704285, "rewards_train/margins_2": 4.295600533485413, "step": 400 }, { "epoch": 1.2, "logps_train/policy_1_2": -143.3292999267578, "logps_train/policy_1_l": -128.79579162597656, "logps_train/policy_1_w": -84.55519104003906, "logps_train/policy_2_2": -97.05038452148438, "logps_train/policy_2_w": -130.90115356445312, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -118.5, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": -0.9738489389419556, "rewards_train/1-l": -1.8490123748779297, "rewards_train/1-w": 2.3441638946533203, "rewards_train/2-2": 2.1464266777038574, "rewards_train/2-w": -0.6893339157104492, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.19317626953125, "rewards_train/margins_1": 3.318012833595276, "rewards_train/margins_2": 2.8357605934143066, "step": 401 }, { "epoch": 1.2, "logps_train/policy_1_2": -85.03107452392578, "logps_train/policy_1_l": -88.86861419677734, "logps_train/policy_1_w": -85.62984466552734, "logps_train/policy_2_2": -56.112060546875, "logps_train/policy_2_w": -130.246337890625, "logps_train/ref_1_2": -79.0, "logps_train/ref_1_l": -72.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -69.5, "logps_train/ref_2_w": -120.0, "rewards_train/1-2": -0.5921698808670044, "rewards_train/1-l": -1.6288535594940186, "rewards_train/1-w": 2.0576212406158447, "rewards_train/2-2": 1.3190672397613525, "rewards_train/2-w": -1.0082274675369263, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.6864748001098633, "rewards_train/margins_1": 2.649791121482849, "rewards_train/margins_2": 2.327294707298279, "step": 401 }, { "epoch": 1.2, "logps_train/policy_1_2": -113.50765991210938, "logps_train/policy_1_l": -141.03619384765625, "logps_train/policy_1_w": -98.41536712646484, "logps_train/policy_2_2": -78.63053894042969, "logps_train/policy_2_w": -146.76026916503906, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -0.46502384543418884, "rewards_train/1-l": -1.7224177122116089, "rewards_train/1-w": 2.672916889190674, "rewards_train/2-2": 1.7310869693756104, "rewards_train/2-w": -0.22680866718292236, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.395334601402283, "rewards_train/margins_1": 3.1379407346248627, "rewards_train/margins_2": 1.9578956365585327, "step": 401 }, { "epoch": 1.2, "logps_train/policy_1_2": -142.52549743652344, "logps_train/policy_1_l": -158.13272094726562, "logps_train/policy_1_w": -95.87680053710938, "logps_train/policy_2_2": -102.94850158691406, "logps_train/policy_2_w": -146.3857879638672, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -0.5497167706489563, "rewards_train/1-l": -2.166982650756836, "rewards_train/1-w": 2.406851053237915, "rewards_train/2-2": 2.2330799102783203, "rewards_train/2-w": -0.4774459898471832, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.573833703994751, "rewards_train/margins_1": 2.9565678238868713, "rewards_train/margins_2": 2.7105259001255035, "step": 401 }, { "epoch": 1.2, "learning_rate": 1.8888068558035435e-06, "loss": 0.7062, "step": 402 }, { "epoch": 1.2, "logps_train/policy_1_2": -147.804443359375, "logps_train/policy_1_l": -151.45114135742188, "logps_train/policy_1_w": -97.35720825195312, "logps_train/policy_2_2": -92.33380126953125, "logps_train/policy_2_w": -154.89439392089844, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": -1.4562253952026367, "rewards_train/1-l": -2.550778865814209, "rewards_train/1-w": 2.6685760021209717, "rewards_train/2-2": 2.179119825363159, "rewards_train/2-w": -0.9886579513549805, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.219354867935181, "rewards_train/margins_1": 4.124801397323608, "rewards_train/margins_2": 3.1677777767181396, "step": 402 }, { "epoch": 1.2, "logps_train/policy_1_2": -127.46607208251953, "logps_train/policy_1_l": -119.55194091796875, "logps_train/policy_1_w": -97.564453125, "logps_train/policy_2_2": -83.44891357421875, "logps_train/policy_2_w": -145.52207946777344, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": -1.1148204803466797, "rewards_train/1-l": -1.58488130569458, "rewards_train/1-w": 2.0997555255889893, "rewards_train/2-2": 1.8643369674682617, "rewards_train/2-w": -1.2054309844970703, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.6846368312835693, "rewards_train/margins_1": 3.214576005935669, "rewards_train/margins_2": 3.069767951965332, "step": 402 }, { "epoch": 1.2, "logps_train/policy_1_2": -128.47210693359375, "logps_train/policy_1_l": -107.08534240722656, "logps_train/policy_1_w": -83.38601684570312, "logps_train/policy_2_2": -85.89752197265625, "logps_train/policy_2_w": -133.9567108154297, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": -0.5952572226524353, "rewards_train/1-l": -1.860975742340088, "rewards_train/1-w": 2.665694236755371, "rewards_train/2-2": 2.196575880050659, "rewards_train/2-w": -0.48238953948020935, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.526669979095459, "rewards_train/margins_1": 3.2609514594078064, "rewards_train/margins_2": 2.6789654195308685, "step": 402 }, { "epoch": 1.2, "logps_train/policy_1_2": -207.21859741210938, "logps_train/policy_1_l": -131.31173706054688, "logps_train/policy_1_w": -112.30567932128906, "logps_train/policy_2_2": -121.84602355957031, "logps_train/policy_2_w": -175.18096923828125, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -2.241391181945801, "rewards_train/1-l": -1.7088587284088135, "rewards_train/1-w": 2.731541872024536, "rewards_train/2-2": 3.3294596672058105, "rewards_train/2-w": -1.5233694314956665, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.44040060043335, "rewards_train/margins_1": 4.972933053970337, "rewards_train/margins_2": 4.852829098701477, "step": 402 }, { "epoch": 1.21, "logps_train/policy_1_2": -168.16302490234375, "logps_train/policy_1_l": -152.3377685546875, "logps_train/policy_1_w": -99.65454864501953, "logps_train/policy_2_2": -111.96846008300781, "logps_train/policy_2_w": -161.10858154296875, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -1.5024348497390747, "rewards_train/1-l": -2.29972505569458, "rewards_train/1-w": 2.645549774169922, "rewards_train/2-2": 2.4545209407806396, "rewards_train/2-w": -1.4635920524597168, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.945274829864502, "rewards_train/margins_1": 4.147984623908997, "rewards_train/margins_2": 3.9181129932403564, "step": 403 }, { "epoch": 1.21, "logps_train/policy_1_2": -248.31546020507812, "logps_train/policy_1_l": -199.37257385253906, "logps_train/policy_1_w": -164.30801391601562, "logps_train/policy_2_2": -172.58607482910156, "logps_train/policy_2_w": -228.04678344726562, "logps_train/ref_1_2": -233.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -207.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": -1.5276395082473755, "rewards_train/1-l": -2.2398953437805176, "rewards_train/1-w": 2.9047458171844482, "rewards_train/2-2": 3.4152204990386963, "rewards_train/2-w": -1.2070205211639404, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.144641160964966, "rewards_train/margins_1": 4.432385325431824, "rewards_train/margins_2": 4.622241020202637, "step": 403 }, { "epoch": 1.21, "logps_train/policy_1_2": -128.55712890625, "logps_train/policy_1_l": -78.92303466796875, "logps_train/policy_1_w": -86.34580993652344, "logps_train/policy_2_2": -77.87088775634766, "logps_train/policy_2_w": -143.85565185546875, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -70.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -0.8498535752296448, "rewards_train/1-l": -0.8958191275596619, "rewards_train/1-w": 2.445888042449951, "rewards_train/2-2": 2.271212100982666, "rewards_train/2-w": -1.34952974319458, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.341707170009613, "rewards_train/margins_1": 3.295741617679596, "rewards_train/margins_2": 3.620741844177246, "step": 403 }, { "epoch": 1.21, "logps_train/policy_1_2": -225.77940368652344, "logps_train/policy_1_l": -163.50946044921875, "logps_train/policy_1_w": -141.1081085205078, "logps_train/policy_2_2": -154.4765625, "logps_train/policy_2_w": -219.56280517578125, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -1.4927845001220703, "rewards_train/1-l": -1.7644469738006592, "rewards_train/1-w": 3.828251361846924, "rewards_train/2-2": 3.520313262939453, "rewards_train/2-w": -1.2109678983688354, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.592698335647583, "rewards_train/margins_1": 5.321035861968994, "rewards_train/margins_2": 4.731281161308289, "step": 403 }, { "epoch": 1.21, "learning_rate": 1.8648884774329526e-06, "loss": 0.682, "step": 404 }, { "epoch": 1.21, "logps_train/policy_1_2": -101.87430572509766, "logps_train/policy_1_l": -129.25015258789062, "logps_train/policy_1_w": -87.25296020507812, "logps_train/policy_2_2": -66.99183654785156, "logps_train/policy_2_w": -140.738525390625, "logps_train/ref_1_2": -96.5, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -85.5, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": -0.5577431321144104, "rewards_train/1-l": -1.7451320886611938, "rewards_train/1-w": 2.7442357540130615, "rewards_train/2-2": 1.8490585088729858, "rewards_train/2-w": -0.781274139881134, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.489367842674255, "rewards_train/margins_1": 3.301978886127472, "rewards_train/margins_2": 2.63033264875412, "step": 404 }, { "epoch": 1.21, "logps_train/policy_1_2": -179.11956787109375, "logps_train/policy_1_l": -152.92282104492188, "logps_train/policy_1_w": -93.69303894042969, "logps_train/policy_2_2": -115.18144226074219, "logps_train/policy_2_w": -149.86599731445312, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -1.3150830268859863, "rewards_train/1-l": -2.292867660522461, "rewards_train/1-w": 2.2563796043395996, "rewards_train/2-2": 2.981367588043213, "rewards_train/2-w": -1.2963664531707764, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.5492472648620605, "rewards_train/margins_1": 3.571462631225586, "rewards_train/margins_2": 4.277734041213989, "step": 404 }, { "epoch": 1.21, "logps_train/policy_1_2": -196.4932861328125, "logps_train/policy_1_l": -165.13990783691406, "logps_train/policy_1_w": -149.8928680419922, "logps_train/policy_2_2": -151.7225799560547, "logps_train/policy_2_w": -193.40399169921875, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": 0.08612202107906342, "rewards_train/1-l": -1.7453383207321167, "rewards_train/1-w": 3.437178373336792, "rewards_train/2-2": 3.0943429470062256, "rewards_train/2-w": 0.2695619463920593, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.182516694068909, "rewards_train/margins_1": 3.3510563522577286, "rewards_train/margins_2": 2.8247810006141663, "step": 404 }, { "epoch": 1.21, "logps_train/policy_1_2": -194.172607421875, "logps_train/policy_1_l": -238.72872924804688, "logps_train/policy_1_w": -127.61012268066406, "logps_train/policy_2_2": -121.27381896972656, "logps_train/policy_2_w": -194.79757690429688, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -1.4981186389923096, "rewards_train/1-l": -3.0498263835906982, "rewards_train/1-w": 2.860081672668457, "rewards_train/2-2": 3.358947277069092, "rewards_train/2-w": -1.197140097618103, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.909908056259155, "rewards_train/margins_1": 4.358200311660767, "rewards_train/margins_2": 4.556087374687195, "step": 404 }, { "epoch": 1.21, "logps_train/policy_1_2": -243.33935546875, "logps_train/policy_1_l": -211.41146850585938, "logps_train/policy_1_w": -103.52467346191406, "logps_train/policy_2_2": -153.29759216308594, "logps_train/policy_2_w": -166.57540893554688, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -2.201122999191284, "rewards_train/1-l": -2.4550137519836426, "rewards_train/1-w": 2.9506583213806152, "rewards_train/2-2": 3.7053966522216797, "rewards_train/2-w": -0.7798060178756714, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.405672073364258, "rewards_train/margins_1": 5.151781320571899, "rewards_train/margins_2": 4.485202670097351, "step": 405 }, { "epoch": 1.21, "logps_train/policy_1_2": -177.62351989746094, "logps_train/policy_1_l": -156.07981872558594, "logps_train/policy_1_w": -114.59381866455078, "logps_train/policy_2_2": -114.56185913085938, "logps_train/policy_2_w": -171.7802734375, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.3549302816390991, "rewards_train/1-l": -2.7378389835357666, "rewards_train/1-w": 2.571868419647217, "rewards_train/2-2": 3.007290840148926, "rewards_train/2-w": -1.1956050395965576, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.309707403182983, "rewards_train/margins_1": 3.926798701286316, "rewards_train/margins_2": 4.202895879745483, "step": 405 }, { "epoch": 1.21, "logps_train/policy_1_2": -273.77447509765625, "logps_train/policy_1_l": -238.92544555664062, "logps_train/policy_1_w": -196.52574157714844, "logps_train/policy_2_2": -192.65310668945312, "logps_train/policy_2_w": -295.83526611328125, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -239.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -272.0, "rewards_train/1-2": -1.3559622764587402, "rewards_train/1-l": -2.940981388092041, "rewards_train/1-w": 4.263441562652588, "rewards_train/2-2": 3.853098154067993, "rewards_train/2-w": -2.3421196937561035, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.204422950744629, "rewards_train/margins_1": 5.619403839111328, "rewards_train/margins_2": 6.195217847824097, "step": 405 }, { "epoch": 1.21, "logps_train/policy_1_2": -211.02288818359375, "logps_train/policy_1_l": -198.0130157470703, "logps_train/policy_1_w": -124.59895324707031, "logps_train/policy_2_2": -139.2869873046875, "logps_train/policy_2_w": -197.6988067626953, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.1175236701965332, "rewards_train/1-l": -2.1639962196350098, "rewards_train/1-w": 3.2045581340789795, "rewards_train/2-2": 3.1765754222869873, "rewards_train/2-w": -0.9663652181625366, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.368554353713989, "rewards_train/margins_1": 4.322081804275513, "rewards_train/margins_2": 4.142940640449524, "step": 405 }, { "epoch": 1.22, "learning_rate": 1.8410320848273315e-06, "loss": 0.5652, "step": 406 }, { "epoch": 1.22, "logps_train/policy_1_2": -205.36746215820312, "logps_train/policy_1_l": -184.58511352539062, "logps_train/policy_1_w": -126.03788757324219, "logps_train/policy_2_2": -133.9895782470703, "logps_train/policy_2_w": -194.769287109375, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -0.9213165044784546, "rewards_train/1-l": -1.8865625858306885, "rewards_train/1-w": 2.8606648445129395, "rewards_train/2-2": 3.8231120109558105, "rewards_train/2-w": -1.2585694789886475, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.747227430343628, "rewards_train/margins_1": 3.781981348991394, "rewards_train/margins_2": 5.081681489944458, "step": 406 }, { "epoch": 1.22, "logps_train/policy_1_2": -199.54173278808594, "logps_train/policy_1_l": -173.75509643554688, "logps_train/policy_1_w": -167.489501953125, "logps_train/policy_2_2": -129.72027587890625, "logps_train/policy_2_w": -246.5752410888672, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -236.0, "rewards_train/1-2": -0.8499438762664795, "rewards_train/1-l": -1.9740625619888306, "rewards_train/1-w": 4.03542423248291, "rewards_train/2-2": 3.693939447402954, "rewards_train/2-w": -1.058694839477539, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.009486794471741, "rewards_train/margins_1": 4.88536810874939, "rewards_train/margins_2": 4.752634286880493, "step": 406 }, { "epoch": 1.22, "logps_train/policy_1_2": -167.00689697265625, "logps_train/policy_1_l": -170.59654235839844, "logps_train/policy_1_w": -124.0176773071289, "logps_train/policy_2_2": -111.6416015625, "logps_train/policy_2_w": -182.07177734375, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -1.0461976528167725, "rewards_train/1-l": -2.1455113887786865, "rewards_train/1-w": 2.768935441970825, "rewards_train/2-2": 2.488086462020874, "rewards_train/2-w": -0.7652837038040161, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.914446830749512, "rewards_train/margins_1": 3.8151330947875977, "rewards_train/margins_2": 3.25337016582489, "step": 406 }, { "epoch": 1.22, "logps_train/policy_1_2": -110.21286010742188, "logps_train/policy_1_l": -174.18357849121094, "logps_train/policy_1_w": -93.2445297241211, "logps_train/policy_2_2": -69.96759033203125, "logps_train/policy_2_w": -142.47325134277344, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -90.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -0.6306607723236084, "rewards_train/1-l": -2.2242166996002197, "rewards_train/1-w": 2.492734432220459, "rewards_train/2-2": 2.0411314964294434, "rewards_train/2-w": -0.7895121574401855, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.716951131820679, "rewards_train/margins_1": 3.1233952045440674, "rewards_train/margins_2": 2.830643653869629, "step": 406 }, { "epoch": 1.22, "logps_train/policy_1_2": -140.93307495117188, "logps_train/policy_1_l": -108.58914184570312, "logps_train/policy_1_w": -84.51054382324219, "logps_train/policy_2_2": -82.7193603515625, "logps_train/policy_2_w": -132.31695556640625, "logps_train/ref_1_2": -126.5, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -103.5, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": -1.4577603340148926, "rewards_train/1-l": -2.0073506832122803, "rewards_train/1-w": 1.8703320026397705, "rewards_train/2-2": 2.4114623069763184, "rewards_train/2-w": -1.171539068222046, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.877682685852051, "rewards_train/margins_1": 3.328092336654663, "rewards_train/margins_2": 3.5830013751983643, "step": 407 }, { "epoch": 1.22, "logps_train/policy_1_2": -219.39364624023438, "logps_train/policy_1_l": -222.4512939453125, "logps_train/policy_1_w": -174.4019012451172, "logps_train/policy_2_2": -154.4083709716797, "logps_train/policy_2_w": -247.52182006835938, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -199.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -233.0, "rewards_train/1-2": -1.181553602218628, "rewards_train/1-l": -2.331652879714966, "rewards_train/1-w": 3.320747137069702, "rewards_train/2-2": 2.937678813934326, "rewards_train/2-w": -1.4537453651428223, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.652400016784668, "rewards_train/margins_1": 4.50230073928833, "rewards_train/margins_2": 4.391424179077148, "step": 407 }, { "epoch": 1.22, "logps_train/policy_1_2": -190.98976135253906, "logps_train/policy_1_l": -193.89468383789062, "logps_train/policy_1_w": -115.17150115966797, "logps_train/policy_2_2": -120.201171875, "logps_train/policy_2_w": -189.778076171875, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.5671391487121582, "rewards_train/1-l": -2.1859748363494873, "rewards_train/1-w": 2.768787384033203, "rewards_train/2-2": 2.749560832977295, "rewards_train/2-w": -1.6826180219650269, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.95476222038269, "rewards_train/margins_1": 4.335926532745361, "rewards_train/margins_2": 4.432178854942322, "step": 407 }, { "epoch": 1.22, "logps_train/policy_1_2": -173.37353515625, "logps_train/policy_1_l": -206.43023681640625, "logps_train/policy_1_w": -105.46917724609375, "logps_train/policy_2_2": -109.92765808105469, "logps_train/policy_2_w": -167.0980224609375, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -1.6682127714157104, "rewards_train/1-l": -3.2318902015686035, "rewards_train/1-w": 2.012603282928467, "rewards_train/2-2": 2.495905876159668, "rewards_train/2-w": -1.7281607389450073, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.24449348449707, "rewards_train/margins_1": 3.6808160543441772, "rewards_train/margins_2": 4.224066615104675, "step": 407 }, { "epoch": 1.22, "learning_rate": 1.8172400063285423e-06, "loss": 0.7078, "step": 408 }, { "epoch": 1.22, "logps_train/policy_1_2": -151.30068969726562, "logps_train/policy_1_l": -168.80126953125, "logps_train/policy_1_w": -127.34933471679688, "logps_train/policy_2_2": -95.25526428222656, "logps_train/policy_2_w": -206.24563598632812, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -1.549112319946289, "rewards_train/1-l": -2.5272960662841797, "rewards_train/1-w": 2.9127230644226074, "rewards_train/2-2": 2.0913197994232178, "rewards_train/2-w": -2.094679355621338, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.440019130706787, "rewards_train/margins_1": 4.4618353843688965, "rewards_train/margins_2": 4.185999155044556, "step": 408 }, { "epoch": 1.22, "logps_train/policy_1_2": -228.4925537109375, "logps_train/policy_1_l": -244.37261962890625, "logps_train/policy_1_w": -123.41980743408203, "logps_train/policy_2_2": -146.76898193359375, "logps_train/policy_2_w": -202.99765014648438, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.546032428741455, "rewards_train/1-l": -2.638824462890625, "rewards_train/1-w": 3.3806262016296387, "rewards_train/2-2": 3.281867504119873, "rewards_train/2-w": -1.6501548290252686, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.019450664520264, "rewards_train/margins_1": 4.926658630371094, "rewards_train/margins_2": 4.932022333145142, "step": 408 }, { "epoch": 1.22, "logps_train/policy_1_2": -135.52639770507812, "logps_train/policy_1_l": -84.32764434814453, "logps_train/policy_1_w": -55.67737579345703, "logps_train/policy_2_2": -94.19984436035156, "logps_train/policy_2_w": -98.52107238769531, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -73.0, "logps_train/ref_1_w": -77.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -90.5, "rewards_train/1-2": -0.46465206146240234, "rewards_train/1-l": -1.1102056503295898, "rewards_train/1-w": 2.1293327808380127, "rewards_train/2-2": 2.2293689250946045, "rewards_train/2-w": -0.8279372453689575, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.2395384311676025, "rewards_train/margins_1": 2.593984842300415, "rewards_train/margins_2": 3.057306170463562, "step": 408 }, { "epoch": 1.22, "logps_train/policy_1_2": -191.3404541015625, "logps_train/policy_1_l": -181.87681579589844, "logps_train/policy_1_w": -121.61537170410156, "logps_train/policy_2_2": -123.6205825805664, "logps_train/policy_2_w": -215.2661895751953, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.2465453147888184, "rewards_train/1-l": -2.0236806869506836, "rewards_train/1-w": 3.6146345138549805, "rewards_train/2-2": 3.2855005264282227, "rewards_train/2-w": -2.4231035709381104, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.638315200805664, "rewards_train/margins_1": 4.861179828643799, "rewards_train/margins_2": 5.708604097366333, "step": 408 }, { "epoch": 1.22, "logps_train/policy_1_2": -180.26303100585938, "logps_train/policy_1_l": -166.09368896484375, "logps_train/policy_1_w": -105.3993148803711, "logps_train/policy_2_2": -112.58645629882812, "logps_train/policy_2_w": -158.87567138671875, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -1.1610676050186157, "rewards_train/1-l": -2.2833189964294434, "rewards_train/1-w": 2.696396827697754, "rewards_train/2-2": 3.30346417427063, "rewards_train/2-w": -0.643425703048706, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.979715824127197, "rewards_train/margins_1": 3.8574644327163696, "rewards_train/margins_2": 3.946889877319336, "step": 409 }, { "epoch": 1.22, "logps_train/policy_1_2": -193.4268035888672, "logps_train/policy_1_l": -263.66363525390625, "logps_train/policy_1_w": -148.9109344482422, "logps_train/policy_2_2": -140.9019775390625, "logps_train/policy_2_w": -212.58917236328125, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -237.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -0.7844781875610352, "rewards_train/1-l": -2.6218326091766357, "rewards_train/1-w": 3.1276564598083496, "rewards_train/2-2": 2.703552722930908, "rewards_train/2-w": -0.8018858432769775, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.749489068984985, "rewards_train/margins_1": 3.9121346473693848, "rewards_train/margins_2": 3.5054385662078857, "step": 409 }, { "epoch": 1.22, "logps_train/policy_1_2": -146.16734313964844, "logps_train/policy_1_l": -152.1533660888672, "logps_train/policy_1_w": -143.0611114501953, "logps_train/policy_2_2": -96.86908721923828, "logps_train/policy_2_w": -221.3738555908203, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -209.0, "rewards_train/1-2": -0.37544968724250793, "rewards_train/1-l": -1.510453701019287, "rewards_train/1-w": 3.6450610160827637, "rewards_train/2-2": 2.4672417640686035, "rewards_train/2-w": -1.2645344734191895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.155514717102051, "rewards_train/margins_1": 4.020510703325272, "rewards_train/margins_2": 3.731776237487793, "step": 409 }, { "epoch": 1.22, "logps_train/policy_1_2": -173.7436981201172, "logps_train/policy_1_l": -138.38844299316406, "logps_train/policy_1_w": -122.21757507324219, "logps_train/policy_2_2": -116.54798126220703, "logps_train/policy_2_w": -174.62918090820312, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.9713430404663086, "rewards_train/1-l": -1.9341567754745483, "rewards_train/1-w": 3.147383451461792, "rewards_train/2-2": 2.647155284881592, "rewards_train/2-w": -0.4719030261039734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.08154022693634, "rewards_train/margins_1": 4.118726491928101, "rewards_train/margins_2": 3.119058310985565, "step": 409 }, { "epoch": 1.23, "learning_rate": 1.793514564001503e-06, "loss": 0.8248, "step": 410 }, { "epoch": 1.23, "logps_train/policy_1_2": -148.75723266601562, "logps_train/policy_1_l": -147.16439819335938, "logps_train/policy_1_w": -102.53556823730469, "logps_train/policy_2_2": -90.13885498046875, "logps_train/policy_2_w": -184.58401489257812, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.4210350513458252, "rewards_train/1-l": -1.7928067445755005, "rewards_train/1-w": 2.917536735534668, "rewards_train/2-2": 2.3177554607391357, "rewards_train/2-w": -2.4505882263183594, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.7103434801101685, "rewards_train/margins_1": 4.338571786880493, "rewards_train/margins_2": 4.768343687057495, "step": 410 }, { "epoch": 1.23, "logps_train/policy_1_2": -220.45228576660156, "logps_train/policy_1_l": -195.03526306152344, "logps_train/policy_1_w": -134.6362762451172, "logps_train/policy_2_2": -157.5748291015625, "logps_train/policy_2_w": -188.42575073242188, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.9903452396392822, "rewards_train/1-l": -2.417785167694092, "rewards_train/1-w": 2.806685447692871, "rewards_train/2-2": 3.2581419944763184, "rewards_train/2-w": -0.5351533889770508, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.224470615386963, "rewards_train/margins_1": 3.7970306873321533, "rewards_train/margins_2": 3.793295383453369, "step": 410 }, { "epoch": 1.23, "logps_train/policy_1_2": -128.35736083984375, "logps_train/policy_1_l": -63.28919982910156, "logps_train/policy_1_w": -64.24284362792969, "logps_train/policy_2_2": -79.83010864257812, "logps_train/policy_2_w": -102.48193359375, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -54.25, "logps_train/ref_1_w": -85.5, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -97.5, "rewards_train/1-2": -0.9754832983016968, "rewards_train/1-l": -0.8901505470275879, "rewards_train/1-w": 2.1114583015441895, "rewards_train/2-2": 2.2226529121398926, "rewards_train/2-w": -0.49292001128196716, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.0016088485717773, "rewards_train/margins_1": 3.0869415998458862, "rewards_train/margins_2": 2.7155729234218597, "step": 410 }, { "epoch": 1.23, "logps_train/policy_1_2": -174.79139709472656, "logps_train/policy_1_l": -162.53060913085938, "logps_train/policy_1_w": -103.97738647460938, "logps_train/policy_2_2": -107.57984924316406, "logps_train/policy_2_w": -163.50582885742188, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.4172253608703613, "rewards_train/1-l": -2.6080398559570312, "rewards_train/1-w": 2.74991774559021, "rewards_train/2-2": 2.818967819213867, "rewards_train/2-w": -1.5726526975631714, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.357957601547241, "rewards_train/margins_1": 4.167143106460571, "rewards_train/margins_2": 4.391620516777039, "step": 410 }, { "epoch": 1.23, "logps_train/policy_1_2": -134.55862426757812, "logps_train/policy_1_l": -115.84180450439453, "logps_train/policy_1_w": -91.36349487304688, "logps_train/policy_2_2": -77.49610900878906, "logps_train/policy_2_w": -145.76585388183594, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -96.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -1.7242231369018555, "rewards_train/1-l": -1.6733894348144531, "rewards_train/1-w": 2.2862091064453125, "rewards_train/2-2": 1.911717414855957, "rewards_train/2-w": -1.2027571201324463, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9595985412597656, "rewards_train/margins_1": 4.010432243347168, "rewards_train/margins_2": 3.1144745349884033, "step": 411 }, { "epoch": 1.23, "logps_train/policy_1_2": -139.55593872070312, "logps_train/policy_1_l": -118.35675811767578, "logps_train/policy_1_w": -87.88350677490234, "logps_train/policy_2_2": -85.77023315429688, "logps_train/policy_2_w": -151.72119140625, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -1.3637964725494385, "rewards_train/1-l": -1.4813785552978516, "rewards_train/1-w": 2.331204891204834, "rewards_train/2-2": 2.1651644706726074, "rewards_train/2-w": -1.4098155498504639, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.8125834465026855, "rewards_train/margins_1": 3.6950013637542725, "rewards_train/margins_2": 3.5749800205230713, "step": 411 }, { "epoch": 1.23, "logps_train/policy_1_2": -201.39170837402344, "logps_train/policy_1_l": -217.01486206054688, "logps_train/policy_1_w": -153.73599243164062, "logps_train/policy_2_2": -133.5509490966797, "logps_train/policy_2_w": -240.0550079345703, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -1.232530117034912, "rewards_train/1-l": -3.042109966278076, "rewards_train/1-w": 4.491243362426758, "rewards_train/2-2": 3.392561197280884, "rewards_train/2-w": -1.4289369583129883, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 7.533353328704834, "rewards_train/margins_1": 5.72377347946167, "rewards_train/margins_2": 4.821498155593872, "step": 411 }, { "epoch": 1.23, "logps_train/policy_1_2": -180.33297729492188, "logps_train/policy_1_l": -165.18385314941406, "logps_train/policy_1_w": -163.8378143310547, "logps_train/policy_2_2": -108.2315902709961, "logps_train/policy_2_w": -240.52755737304688, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -1.1255823373794556, "rewards_train/1-l": -2.078052520751953, "rewards_train/1-w": 2.8073325157165527, "rewards_train/2-2": 3.4307475090026855, "rewards_train/2-w": -2.1734089851379395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.885385036468506, "rewards_train/margins_1": 3.9329148530960083, "rewards_train/margins_2": 5.604156494140625, "step": 411 }, { "epoch": 1.23, "learning_rate": 1.769858073407561e-06, "loss": 0.8055, "step": 412 }, { "epoch": 1.23, "logps_train/policy_1_2": -154.81227111816406, "logps_train/policy_1_l": -157.5860595703125, "logps_train/policy_1_w": -93.08937072753906, "logps_train/policy_2_2": -100.10638427734375, "logps_train/policy_2_w": -140.12344360351562, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -1.125075340270996, "rewards_train/1-l": -2.2635865211486816, "rewards_train/1-w": 2.483250141143799, "rewards_train/2-2": 2.387603282928467, "rewards_train/2-w": -0.5918360948562622, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.7468366622924805, "rewards_train/margins_1": 3.608325481414795, "rewards_train/margins_2": 2.979439377784729, "step": 412 }, { "epoch": 1.23, "logps_train/policy_1_2": -167.20843505859375, "logps_train/policy_1_l": -149.9715576171875, "logps_train/policy_1_w": -88.24240112304688, "logps_train/policy_2_2": -103.24012756347656, "logps_train/policy_2_w": -146.0801544189453, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -1.4942805767059326, "rewards_train/1-l": -1.361547589302063, "rewards_train/1-w": 2.6823997497558594, "rewards_train/2-2": 2.4470813274383545, "rewards_train/2-w": -1.084578037261963, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.043947339057922, "rewards_train/margins_1": 4.176680326461792, "rewards_train/margins_2": 3.5316593647003174, "step": 412 }, { "epoch": 1.23, "logps_train/policy_1_2": -190.95855712890625, "logps_train/policy_1_l": -150.80813598632812, "logps_train/policy_1_w": -111.54594421386719, "logps_train/policy_2_2": -135.52488708496094, "logps_train/policy_2_w": -170.53866577148438, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -0.8540587425231934, "rewards_train/1-l": -1.628275752067566, "rewards_train/1-w": 2.491694450378418, "rewards_train/2-2": 2.7235255241394043, "rewards_train/2-w": -1.2202733755111694, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.119970202445984, "rewards_train/margins_1": 3.3457531929016113, "rewards_train/margins_2": 3.9437988996505737, "step": 412 }, { "epoch": 1.23, "logps_train/policy_1_2": -203.05584716796875, "logps_train/policy_1_l": -205.4900360107422, "logps_train/policy_1_w": -125.5721664428711, "logps_train/policy_2_2": -134.7877655029297, "logps_train/policy_2_w": -185.29714965820312, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -1.6227729320526123, "rewards_train/1-l": -2.7327933311462402, "rewards_train/1-w": 2.884580135345459, "rewards_train/2-2": 3.1675119400024414, "rewards_train/2-w": -1.1351834535598755, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.617373466491699, "rewards_train/margins_1": 4.507353067398071, "rewards_train/margins_2": 4.302695393562317, "step": 412 }, { "epoch": 1.24, "logps_train/policy_1_2": -145.07559204101562, "logps_train/policy_1_l": -142.04458618164062, "logps_train/policy_1_w": -84.07772827148438, "logps_train/policy_2_2": -90.23345184326172, "logps_train/policy_2_w": -140.64573669433594, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": -1.6806069612503052, "rewards_train/1-l": -1.6503573656082153, "rewards_train/1-w": 2.126211166381836, "rewards_train/2-2": 2.0512640476226807, "rewards_train/2-w": -1.3516829013824463, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.7765685319900513, "rewards_train/margins_1": 3.806818127632141, "rewards_train/margins_2": 3.402946949005127, "step": 413 }, { "epoch": 1.24, "logps_train/policy_1_2": -151.70767211914062, "logps_train/policy_1_l": -95.00492095947266, "logps_train/policy_1_w": -88.87568664550781, "logps_train/policy_2_2": -103.36744689941406, "logps_train/policy_2_w": -148.4453887939453, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -0.5836583971977234, "rewards_train/1-l": -1.4211030006408691, "rewards_train/1-w": 2.514970302581787, "rewards_train/2-2": 2.604271173477173, "rewards_train/2-w": -1.399519681930542, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.9360733032226562, "rewards_train/margins_1": 3.0986286997795105, "rewards_train/margins_2": 4.003790855407715, "step": 413 }, { "epoch": 1.24, "logps_train/policy_1_2": -191.5176239013672, "logps_train/policy_1_l": -208.76712036132812, "logps_train/policy_1_w": -124.18759155273438, "logps_train/policy_2_2": -112.29917907714844, "logps_train/policy_2_w": -197.28482055664062, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -1.910649299621582, "rewards_train/1-l": -1.945901870727539, "rewards_train/1-w": 2.9121971130371094, "rewards_train/2-2": 2.7991344928741455, "rewards_train/2-w": -1.4299471378326416, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.858098983764648, "rewards_train/margins_1": 4.822846412658691, "rewards_train/margins_2": 4.229081630706787, "step": 413 }, { "epoch": 1.24, "logps_train/policy_1_2": -287.1434020996094, "logps_train/policy_1_l": -293.4872741699219, "logps_train/policy_1_w": -168.385009765625, "logps_train/policy_2_2": -192.9651336669922, "logps_train/policy_2_w": -241.66165161132812, "logps_train/ref_1_2": -268.0, "logps_train/ref_1_l": -264.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -236.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -1.940119981765747, "rewards_train/1-l": -2.872800350189209, "rewards_train/1-w": 3.147632598876953, "rewards_train/2-2": 4.2566118240356445, "rewards_train/2-w": -1.5823767185211182, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.020432949066162, "rewards_train/margins_1": 5.0877525806427, "rewards_train/margins_2": 5.838988542556763, "step": 413 }, { "epoch": 1.24, "learning_rate": 1.746272843378493e-06, "loss": 0.637, "step": 414 }, { "epoch": 1.24, "logps_train/policy_1_2": -206.65350341796875, "logps_train/policy_1_l": -159.09323120117188, "logps_train/policy_1_w": -146.62188720703125, "logps_train/policy_2_2": -143.84152221679688, "logps_train/policy_2_w": -248.08575439453125, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": -0.7512865662574768, "rewards_train/1-l": -1.638230562210083, "rewards_train/1-w": 4.75031042098999, "rewards_train/2-2": 3.3174099922180176, "rewards_train/2-w": -1.6273260116577148, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.388540983200073, "rewards_train/margins_1": 5.501596987247467, "rewards_train/margins_2": 4.944736003875732, "step": 414 }, { "epoch": 1.24, "logps_train/policy_1_2": -179.9068603515625, "logps_train/policy_1_l": -175.34152221679688, "logps_train/policy_1_w": -147.87142944335938, "logps_train/policy_2_2": -114.04452514648438, "logps_train/policy_2_w": -218.202392578125, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -1.25396728515625, "rewards_train/1-l": -2.1129860877990723, "rewards_train/1-w": 3.248013973236084, "rewards_train/2-2": 2.9959378242492676, "rewards_train/2-w": -1.2268800735473633, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.361000061035156, "rewards_train/margins_1": 4.501981258392334, "rewards_train/margins_2": 4.222817897796631, "step": 414 }, { "epoch": 1.24, "logps_train/policy_1_2": -128.34945678710938, "logps_train/policy_1_l": -141.96107482910156, "logps_train/policy_1_w": -123.638427734375, "logps_train/policy_2_2": -70.60926055908203, "logps_train/policy_2_w": -205.33856201171875, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.0677584409713745, "rewards_train/1-l": -2.1275525093078613, "rewards_train/1-w": 3.337524652481079, "rewards_train/2-2": 2.255870819091797, "rewards_train/2-w": -1.5377618074417114, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.46507716178894, "rewards_train/margins_1": 4.405283093452454, "rewards_train/margins_2": 3.7936326265335083, "step": 414 }, { "epoch": 1.24, "logps_train/policy_1_2": -218.15501403808594, "logps_train/policy_1_l": -224.93167114257812, "logps_train/policy_1_w": -164.82464599609375, "logps_train/policy_2_2": -149.5911865234375, "logps_train/policy_2_w": -259.1162414550781, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": -1.309250831604004, "rewards_train/1-l": -2.4425806999206543, "rewards_train/1-w": 3.7159721851348877, "rewards_train/2-2": 2.943225145339966, "rewards_train/2-w": -1.930374026298523, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.158552885055542, "rewards_train/margins_1": 5.025223016738892, "rewards_train/margins_2": 4.873599171638489, "step": 414 }, { "epoch": 1.24, "logps_train/policy_1_2": -152.94906616210938, "logps_train/policy_1_l": -162.01046752929688, "logps_train/policy_1_w": -119.83808898925781, "logps_train/policy_2_2": -91.68423461914062, "logps_train/policy_2_w": -193.16998291015625, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.3411955833435059, "rewards_train/1-l": -2.5005574226379395, "rewards_train/1-w": 3.0341598987579346, "rewards_train/2-2": 2.853060722351074, "rewards_train/2-w": -1.6763719320297241, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.534717321395874, "rewards_train/margins_1": 4.37535548210144, "rewards_train/margins_2": 4.529432654380798, "step": 415 }, { "epoch": 1.24, "logps_train/policy_1_2": -152.85464477539062, "logps_train/policy_1_l": -168.04592895507812, "logps_train/policy_1_w": -127.53470611572266, "logps_train/policy_2_2": -90.0410385131836, "logps_train/policy_2_w": -212.9118194580078, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.6136873960494995, "rewards_train/1-l": -2.282522201538086, "rewards_train/1-w": 3.190767765045166, "rewards_train/2-2": 2.1666970252990723, "rewards_train/2-w": -2.303290843963623, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.473289966583252, "rewards_train/margins_1": 4.8044551610946655, "rewards_train/margins_2": 4.469987869262695, "step": 415 }, { "epoch": 1.24, "logps_train/policy_1_2": -166.62738037109375, "logps_train/policy_1_l": -158.38937377929688, "logps_train/policy_1_w": -127.88099670410156, "logps_train/policy_2_2": -112.16741943359375, "logps_train/policy_2_w": -196.0756072998047, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.278362512588501, "rewards_train/1-l": -2.4062209129333496, "rewards_train/1-w": 2.8583858013153076, "rewards_train/2-2": 2.5570874214172363, "rewards_train/2-w": -1.204435110092163, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.264606714248657, "rewards_train/margins_1": 4.136748313903809, "rewards_train/margins_2": 3.7615225315093994, "step": 415 }, { "epoch": 1.24, "logps_train/policy_1_2": -270.7916259765625, "logps_train/policy_1_l": -204.63482666015625, "logps_train/policy_1_w": -158.94073486328125, "logps_train/policy_2_2": -176.7825927734375, "logps_train/policy_2_w": -248.49411010742188, "logps_train/ref_1_2": -249.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -216.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": -2.174865245819092, "rewards_train/1-l": -1.97520112991333, "rewards_train/1-w": 3.730926990509033, "rewards_train/2-2": 3.881115436553955, "rewards_train/2-w": -1.9470669031143188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.706128120422363, "rewards_train/margins_1": 5.905792236328125, "rewards_train/margins_2": 5.828182339668274, "step": 415 }, { "epoch": 1.25, "learning_rate": 1.7227611757911721e-06, "loss": 0.4749, "step": 416 }, { "epoch": 1.25, "logps_train/policy_1_2": -127.98839569091797, "logps_train/policy_1_l": -109.76719665527344, "logps_train/policy_1_w": -60.81465148925781, "logps_train/policy_2_2": -83.1871337890625, "logps_train/policy_2_w": -102.76168060302734, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": -0.9409776926040649, "rewards_train/1-l": -2.073594331741333, "rewards_train/1-w": 2.335331916809082, "rewards_train/2-2": 2.1375370025634766, "rewards_train/2-w": -0.5051476359367371, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.408926248550415, "rewards_train/margins_1": 3.276309609413147, "rewards_train/margins_2": 2.6426846385002136, "step": 416 }, { "epoch": 1.25, "logps_train/policy_1_2": -235.95333862304688, "logps_train/policy_1_l": -220.47772216796875, "logps_train/policy_1_w": -114.35420989990234, "logps_train/policy_2_2": -149.4386749267578, "logps_train/policy_2_w": -186.73321533203125, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -2.269944906234741, "rewards_train/1-l": -2.6964049339294434, "rewards_train/1-w": 2.569608449935913, "rewards_train/2-2": 3.2275195121765137, "rewards_train/2-w": -1.8634580373764038, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.2660133838653564, "rewards_train/margins_1": 4.839553356170654, "rewards_train/margins_2": 5.0909775495529175, "step": 416 }, { "epoch": 1.25, "logps_train/policy_1_2": -182.72378540039062, "logps_train/policy_1_l": -204.8233642578125, "logps_train/policy_1_w": -122.04219055175781, "logps_train/policy_2_2": -127.56664276123047, "logps_train/policy_2_w": -184.67831420898438, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -0.7147611975669861, "rewards_train/1-l": -3.172767400741577, "rewards_train/1-w": 3.036015510559082, "rewards_train/2-2": 2.6595468521118164, "rewards_train/2-w": -1.0065040588378906, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.208782911300659, "rewards_train/margins_1": 3.750776708126068, "rewards_train/margins_2": 3.666050910949707, "step": 416 }, { "epoch": 1.25, "logps_train/policy_1_2": -172.76760864257812, "logps_train/policy_1_l": -185.96615600585938, "logps_train/policy_1_w": -144.2662353515625, "logps_train/policy_2_2": -122.6658935546875, "logps_train/policy_2_w": -204.32737731933594, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.43301206827163696, "rewards_train/1-l": -2.742903709411621, "rewards_train/1-w": 2.998377799987793, "rewards_train/2-2": 2.706798553466797, "rewards_train/2-w": -1.2683331966400146, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.741281509399414, "rewards_train/margins_1": 3.43138986825943, "rewards_train/margins_2": 3.9751317501068115, "step": 416 }, { "epoch": 1.25, "logps_train/policy_1_2": -241.79400634765625, "logps_train/policy_1_l": -162.57351684570312, "logps_train/policy_1_w": -114.83042907714844, "logps_train/policy_2_2": -146.6717071533203, "logps_train/policy_2_w": -187.51846313476562, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -2.3200252056121826, "rewards_train/1-l": -1.8420867919921875, "rewards_train/1-w": 2.7228164672851562, "rewards_train/2-2": 3.677750825881958, "rewards_train/2-w": -1.7710343599319458, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.564903259277344, "rewards_train/margins_1": 5.042841672897339, "rewards_train/margins_2": 5.448785185813904, "step": 417 }, { "epoch": 1.25, "logps_train/policy_1_2": -125.2767333984375, "logps_train/policy_1_l": -123.13762664794922, "logps_train/policy_1_w": -81.21073913574219, "logps_train/policy_2_2": -76.01069641113281, "logps_train/policy_2_w": -142.07676696777344, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -98.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -0.9923219680786133, "rewards_train/1-l": -1.6990418434143066, "rewards_train/1-w": 2.4797558784484863, "rewards_train/2-2": 2.2122116088867188, "rewards_train/2-w": -1.2313095331192017, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.178797721862793, "rewards_train/margins_1": 3.4720778465270996, "rewards_train/margins_2": 3.4435211420059204, "step": 417 }, { "epoch": 1.25, "logps_train/policy_1_2": -188.54147338867188, "logps_train/policy_1_l": -135.41983032226562, "logps_train/policy_1_w": -77.42318725585938, "logps_train/policy_2_2": -111.38249206542969, "logps_train/policy_2_w": -135.37997436523438, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -99.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -117.5, "rewards_train/1-2": -2.151412010192871, "rewards_train/1-l": -2.3363184928894043, "rewards_train/1-w": 2.200051784515381, "rewards_train/2-2": 2.8555002212524414, "rewards_train/2-w": -1.7775249481201172, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.536370277404785, "rewards_train/margins_1": 4.351463794708252, "rewards_train/margins_2": 4.633025169372559, "step": 417 }, { "epoch": 1.25, "logps_train/policy_1_2": -189.5760040283203, "logps_train/policy_1_l": -210.9153289794922, "logps_train/policy_1_w": -133.14425659179688, "logps_train/policy_2_2": -143.41281127929688, "logps_train/policy_2_w": -193.60760498046875, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": 0.1674002856016159, "rewards_train/1-l": -3.033721446990967, "rewards_train/1-w": 3.4414329528808594, "rewards_train/2-2": 3.425516366958618, "rewards_train/2-w": -0.31544870138168335, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.475154399871826, "rewards_train/margins_1": 3.2740326672792435, "rewards_train/margins_2": 3.7409650683403015, "step": 417 }, { "epoch": 1.25, "learning_rate": 1.6993253653429064e-06, "loss": 0.6211, "step": 418 }, { "epoch": 1.25, "logps_train/policy_1_2": -136.76077270507812, "logps_train/policy_1_l": -123.89405822753906, "logps_train/policy_1_w": -131.82461547851562, "logps_train/policy_2_2": -102.021728515625, "logps_train/policy_2_w": -184.43331909179688, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": 0.10946941375732422, "rewards_train/1-l": -1.7220715284347534, "rewards_train/1-w": 2.8382413387298584, "rewards_train/2-2": 2.3394289016723633, "rewards_train/2-w": -0.5726292133331299, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.560312867164612, "rewards_train/margins_1": 2.728771924972534, "rewards_train/margins_2": 2.912058115005493, "step": 418 }, { "epoch": 1.25, "logps_train/policy_1_2": -117.63511657714844, "logps_train/policy_1_l": -158.094482421875, "logps_train/policy_1_w": -91.83555603027344, "logps_train/policy_2_2": -78.15140533447266, "logps_train/policy_2_w": -142.04991149902344, "logps_train/ref_1_2": -108.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -0.9904652833938599, "rewards_train/1-l": -2.3286869525909424, "rewards_train/1-w": 2.1083147525787354, "rewards_train/2-2": 1.572359323501587, "rewards_train/2-w": -1.1334086656570435, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.437001705169678, "rewards_train/margins_1": 3.098780035972595, "rewards_train/margins_2": 2.7057679891586304, "step": 418 }, { "epoch": 1.25, "logps_train/policy_1_2": -232.16119384765625, "logps_train/policy_1_l": -207.8214111328125, "logps_train/policy_1_w": -142.77926635742188, "logps_train/policy_2_2": -140.64959716796875, "logps_train/policy_2_w": -235.04969787597656, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -2.8743209838867188, "rewards_train/1-l": -2.1617300510406494, "rewards_train/1-w": 2.791995048522949, "rewards_train/2-2": 2.9293770790100098, "rewards_train/2-w": -3.0188374519348145, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.953725099563599, "rewards_train/margins_1": 5.666316032409668, "rewards_train/margins_2": 5.948214530944824, "step": 418 }, { "epoch": 1.25, "logps_train/policy_1_2": -253.34249877929688, "logps_train/policy_1_l": -258.60211181640625, "logps_train/policy_1_w": -122.62631225585938, "logps_train/policy_2_2": -152.50445556640625, "logps_train/policy_2_w": -203.56283569335938, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -225.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -2.6123757362365723, "rewards_train/1-l": -3.3207578659057617, "rewards_train/1-w": 3.4584622383117676, "rewards_train/2-2": 3.6099061965942383, "rewards_train/2-w": -1.5437840223312378, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.779220104217529, "rewards_train/margins_1": 6.07083797454834, "rewards_train/margins_2": 5.153690218925476, "step": 418 }, { "epoch": 1.25, "logps_train/policy_1_2": -180.87803649902344, "logps_train/policy_1_l": -157.35015869140625, "logps_train/policy_1_w": -88.2271728515625, "logps_train/policy_2_2": -113.39237976074219, "logps_train/policy_2_w": -159.1337890625, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": -2.052647590637207, "rewards_train/1-l": -2.469783306121826, "rewards_train/1-w": 3.005603551864624, "rewards_train/2-2": 2.4283158779144287, "rewards_train/2-w": -1.4129884243011475, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.47538685798645, "rewards_train/margins_1": 5.058251142501831, "rewards_train/margins_2": 3.841304302215576, "step": 419 }, { "epoch": 1.25, "logps_train/policy_1_2": -142.66348266601562, "logps_train/policy_1_l": -123.75137329101562, "logps_train/policy_1_w": -89.21604919433594, "logps_train/policy_2_2": -94.17858123779297, "logps_train/policy_2_w": -138.16644287109375, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -105.5, "logps_train/ref_1_w": -112.5, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": -0.8427150249481201, "rewards_train/1-l": -1.8494296073913574, "rewards_train/1-w": 2.3428964614868164, "rewards_train/2-2": 2.309925079345703, "rewards_train/2-w": -1.1051692962646484, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.192326068878174, "rewards_train/margins_1": 3.1856114864349365, "rewards_train/margins_2": 3.4150943756103516, "step": 419 }, { "epoch": 1.25, "logps_train/policy_1_2": -238.0109405517578, "logps_train/policy_1_l": -230.97454833984375, "logps_train/policy_1_w": -139.4878387451172, "logps_train/policy_2_2": -158.21531677246094, "logps_train/policy_2_w": -220.63800048828125, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -1.5998307466506958, "rewards_train/1-l": -2.9055674076080322, "rewards_train/1-w": 3.834028720855713, "rewards_train/2-2": 3.396907329559326, "rewards_train/2-w": -0.6497375965118408, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.739596128463745, "rewards_train/margins_1": 5.433859467506409, "rewards_train/margins_2": 4.046644926071167, "step": 419 }, { "epoch": 1.25, "logps_train/policy_1_2": -154.54995727539062, "logps_train/policy_1_l": -165.34930419921875, "logps_train/policy_1_w": -110.90408325195312, "logps_train/policy_2_2": -101.05274963378906, "logps_train/policy_2_w": -171.24099731445312, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -0.5221828818321228, "rewards_train/1-l": -2.084929943084717, "rewards_train/1-w": 2.6486546993255615, "rewards_train/2-2": 2.678318738937378, "rewards_train/2-w": -1.0951930284500122, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.733584642410278, "rewards_train/margins_1": 3.1708375811576843, "rewards_train/margins_2": 3.77351176738739, "step": 419 }, { "epoch": 1.26, "learning_rate": 1.6759676993274805e-06, "loss": 0.6423, "step": 420 }, { "epoch": 1.26, "logps_train/policy_1_2": -146.4456024169922, "logps_train/policy_1_l": -127.82320404052734, "logps_train/policy_1_w": -78.89317321777344, "logps_train/policy_2_2": -81.48313903808594, "logps_train/policy_2_w": -134.68829345703125, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -106.0, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": -1.5312790870666504, "rewards_train/1-l": -1.6507070064544678, "rewards_train/1-w": 2.7124123573303223, "rewards_train/2-2": 2.4359636306762695, "rewards_train/2-w": -0.9126288890838623, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.36311936378479, "rewards_train/margins_1": 4.243691444396973, "rewards_train/margins_2": 3.348592519760132, "step": 420 }, { "epoch": 1.26, "logps_train/policy_1_2": -122.76251983642578, "logps_train/policy_1_l": -172.4445343017578, "logps_train/policy_1_w": -90.09840393066406, "logps_train/policy_2_2": -79.36967468261719, "logps_train/policy_2_w": -132.59747314453125, "logps_train/ref_1_2": -117.5, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": -0.5065252184867859, "rewards_train/1-l": -2.4446964263916016, "rewards_train/1-w": 2.503002166748047, "rewards_train/2-2": 2.4538536071777344, "rewards_train/2-w": -0.09451194852590561, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.947698593139648, "rewards_train/margins_1": 3.0095273852348328, "rewards_train/margins_2": 2.54836555570364, "step": 420 }, { "epoch": 1.26, "logps_train/policy_1_2": -230.0281982421875, "logps_train/policy_1_l": -212.12472534179688, "logps_train/policy_1_w": -139.5388641357422, "logps_train/policy_2_2": -146.04962158203125, "logps_train/policy_2_w": -213.94070434570312, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": -2.2343647480010986, "rewards_train/1-l": -2.25915265083313, "rewards_train/1-w": 3.4883012771606445, "rewards_train/2-2": 3.379120349884033, "rewards_train/2-w": -1.3050084114074707, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.747453927993774, "rewards_train/margins_1": 5.722666025161743, "rewards_train/margins_2": 4.684128761291504, "step": 420 }, { "epoch": 1.26, "logps_train/policy_1_2": -193.88555908203125, "logps_train/policy_1_l": -208.64686584472656, "logps_train/policy_1_w": -120.76163482666016, "logps_train/policy_2_2": -133.40615844726562, "logps_train/policy_2_w": -185.15736389160156, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -1.2954907417297363, "rewards_train/1-l": -2.5341198444366455, "rewards_train/1-w": 2.604696035385132, "rewards_train/2-2": 2.7008872032165527, "rewards_train/2-w": -1.0860481262207031, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.138815879821777, "rewards_train/margins_1": 3.900186777114868, "rewards_train/margins_2": 3.786935329437256, "step": 420 }, { "epoch": 1.26, "logps_train/policy_1_2": -188.6539306640625, "logps_train/policy_1_l": -171.40011596679688, "logps_train/policy_1_w": -131.13705444335938, "logps_train/policy_2_2": -126.48368072509766, "logps_train/policy_2_w": -217.38815307617188, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.1646113395690918, "rewards_train/1-l": -1.9204797744750977, "rewards_train/1-w": 3.602700710296631, "rewards_train/2-2": 2.8805389404296875, "rewards_train/2-w": -2.074751853942871, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.5231804847717285, "rewards_train/margins_1": 4.767312049865723, "rewards_train/margins_2": 4.955290794372559, "step": 421 }, { "epoch": 1.26, "logps_train/policy_1_2": -85.65980529785156, "logps_train/policy_1_l": -106.57063293457031, "logps_train/policy_1_w": -52.12529754638672, "logps_train/policy_2_2": -51.04154968261719, "logps_train/policy_2_w": -90.65499877929688, "logps_train/ref_1_2": -74.0, "logps_train/ref_1_l": -87.5, "logps_train/ref_1_w": -67.0, "logps_train/ref_2_2": -62.25, "logps_train/ref_2_w": -79.5, "rewards_train/1-2": -1.1663705110549927, "rewards_train/1-l": -1.8714120388031006, "rewards_train/1-w": 1.5030709505081177, "rewards_train/2-2": 1.1194777488708496, "rewards_train/2-w": -1.1159883737564087, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.3744829893112183, "rewards_train/margins_1": 2.6694414615631104, "rewards_train/margins_2": 2.2354661226272583, "step": 421 }, { "epoch": 1.26, "logps_train/policy_1_2": -197.72463989257812, "logps_train/policy_1_l": -168.4869384765625, "logps_train/policy_1_w": -133.17601013183594, "logps_train/policy_2_2": -125.08993530273438, "logps_train/policy_2_w": -215.75746154785156, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.605471134185791, "rewards_train/1-l": -2.2438108921051025, "rewards_train/1-w": 3.180885076522827, "rewards_train/2-2": 2.897745370864868, "rewards_train/2-w": -1.9167360067367554, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.42469596862793, "rewards_train/margins_1": 4.786356210708618, "rewards_train/margins_2": 4.8144813776016235, "step": 421 }, { "epoch": 1.26, "logps_train/policy_1_2": -235.50222778320312, "logps_train/policy_1_l": -174.74615478515625, "logps_train/policy_1_w": -164.1525421142578, "logps_train/policy_2_2": -162.97898864746094, "logps_train/policy_2_w": -245.66079711914062, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -225.0, "rewards_train/1-2": -2.0838170051574707, "rewards_train/1-l": -2.07481050491333, "rewards_train/1-w": 3.630058526992798, "rewards_train/2-2": 2.8278822898864746, "rewards_train/2-w": -2.067641496658325, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.704869031906128, "rewards_train/margins_1": 5.7138755321502686, "rewards_train/margins_2": 4.8955237865448, "step": 421 }, { "epoch": 1.26, "learning_rate": 1.6526904574119213e-06, "loss": 0.7213, "step": 422 }, { "epoch": 1.26, "logps_train/policy_1_2": -179.1871795654297, "logps_train/policy_1_l": -130.58734130859375, "logps_train/policy_1_w": -109.36216735839844, "logps_train/policy_2_2": -121.61825561523438, "logps_train/policy_2_w": -163.90147399902344, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": -0.7665681838989258, "rewards_train/1-l": -1.6778748035430908, "rewards_train/1-w": 3.058314800262451, "rewards_train/2-2": 3.2411041259765625, "rewards_train/2-w": -0.8393654227256775, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.736189603805542, "rewards_train/margins_1": 3.824882984161377, "rewards_train/margins_2": 4.08046954870224, "step": 422 }, { "epoch": 1.26, "logps_train/policy_1_2": -153.4834442138672, "logps_train/policy_1_l": -112.31011199951172, "logps_train/policy_1_w": -107.58696746826172, "logps_train/policy_2_2": -92.84754943847656, "logps_train/policy_2_w": -176.31216430664062, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -97.5, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.5002977848052979, "rewards_train/1-l": -1.4871639013290405, "rewards_train/1-w": 3.0768990516662598, "rewards_train/2-2": 2.3812613487243652, "rewards_train/2-w": -1.4265284538269043, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.5640629529953, "rewards_train/margins_1": 4.577196836471558, "rewards_train/margins_2": 3.8077898025512695, "step": 422 }, { "epoch": 1.26, "logps_train/policy_1_2": -157.45831298828125, "logps_train/policy_1_l": -114.5489273071289, "logps_train/policy_1_w": -104.90352630615234, "logps_train/policy_2_2": -94.2412338256836, "logps_train/policy_2_w": -185.6873016357422, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -96.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -118.5, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.647589921951294, "rewards_train/1-l": -1.8508886098861694, "rewards_train/1-w": 3.0391387939453125, "rewards_train/2-2": 2.4076273441314697, "rewards_train/2-w": -2.3404946327209473, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.890027403831482, "rewards_train/margins_1": 4.6867287158966064, "rewards_train/margins_2": 4.748121976852417, "step": 422 }, { "epoch": 1.26, "logps_train/policy_1_2": -111.55389404296875, "logps_train/policy_1_l": -133.911865234375, "logps_train/policy_1_w": -69.34879302978516, "logps_train/policy_2_2": -62.9984245300293, "logps_train/policy_2_w": -125.52610778808594, "logps_train/ref_1_2": -100.5, "logps_train/ref_1_l": -112.5, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": -1.1046080589294434, "rewards_train/1-l": -2.1555418968200684, "rewards_train/1-w": 2.2362148761749268, "rewards_train/2-2": 2.1724228858947754, "rewards_train/2-w": -1.2742902040481567, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.391756772994995, "rewards_train/margins_1": 3.34082293510437, "rewards_train/margins_2": 3.446713089942932, "step": 422 }, { "epoch": 1.27, "logps_train/policy_1_2": -135.11282348632812, "logps_train/policy_1_l": -169.99484252929688, "logps_train/policy_1_w": -118.86262512207031, "logps_train/policy_2_2": -89.55186462402344, "logps_train/policy_2_w": -170.24940490722656, "logps_train/ref_1_2": -126.5, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -0.8694857358932495, "rewards_train/1-l": -2.24206280708313, "rewards_train/1-w": 2.6660823822021484, "rewards_train/2-2": 2.019031524658203, "rewards_train/2-w": -0.7460342645645142, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.908145189285278, "rewards_train/margins_1": 3.535568118095398, "rewards_train/margins_2": 2.7650657892227173, "step": 423 }, { "epoch": 1.27, "logps_train/policy_1_2": -111.80982971191406, "logps_train/policy_1_l": -139.23733520507812, "logps_train/policy_1_w": -73.6261215209961, "logps_train/policy_2_2": -70.2852783203125, "logps_train/policy_2_w": -121.57255554199219, "logps_train/ref_1_2": -99.5, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -88.5, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": -1.2219982147216797, "rewards_train/1-l": -1.9605509042739868, "rewards_train/1-w": 2.138169288635254, "rewards_train/2-2": 1.8132203817367554, "rewards_train/2-w": -1.1617474555969238, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.098720192909241, "rewards_train/margins_1": 3.3601675033569336, "rewards_train/margins_2": 2.974967837333679, "step": 423 }, { "epoch": 1.27, "logps_train/policy_1_2": -146.13246154785156, "logps_train/policy_1_l": -187.49002075195312, "logps_train/policy_1_w": -129.11361694335938, "logps_train/policy_2_2": -94.45088195800781, "logps_train/policy_2_w": -188.07400512695312, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -118.5, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -0.9909799695014954, "rewards_train/1-l": -2.504765033721924, "rewards_train/1-w": 2.6357457637786865, "rewards_train/2-2": 2.4002246856689453, "rewards_train/2-w": -1.26169753074646, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.14051079750061, "rewards_train/margins_1": 3.626725733280182, "rewards_train/margins_2": 3.6619222164154053, "step": 423 }, { "epoch": 1.27, "logps_train/policy_1_2": -160.84910583496094, "logps_train/policy_1_l": -176.22219848632812, "logps_train/policy_1_w": -163.11119079589844, "logps_train/policy_2_2": -100.44393157958984, "logps_train/policy_2_w": -238.68820190429688, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -1.0200666189193726, "rewards_train/1-l": -2.546586513519287, "rewards_train/1-w": 3.6583139896392822, "rewards_train/2-2": 2.9892008304595947, "rewards_train/2-w": -1.6385451555252075, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.204900503158569, "rewards_train/margins_1": 4.678380608558655, "rewards_train/margins_2": 4.627745985984802, "step": 423 }, { "epoch": 1.27, "learning_rate": 1.6294959114140036e-06, "loss": 0.6341, "step": 424 }, { "epoch": 1.27, "logps_train/policy_1_2": -192.31112670898438, "logps_train/policy_1_l": -171.2418670654297, "logps_train/policy_1_w": -128.21258544921875, "logps_train/policy_2_2": -119.32337188720703, "logps_train/policy_2_w": -214.7683868408203, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.913925051689148, "rewards_train/1-l": -1.9845385551452637, "rewards_train/1-w": 3.3310842514038086, "rewards_train/2-2": 2.7846546173095703, "rewards_train/2-w": -2.3520333766937256, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.315622806549072, "rewards_train/margins_1": 5.2450093030929565, "rewards_train/margins_2": 5.136687994003296, "step": 424 }, { "epoch": 1.27, "logps_train/policy_1_2": -178.59286499023438, "logps_train/policy_1_l": -93.05171966552734, "logps_train/policy_1_w": -57.6197624206543, "logps_train/policy_2_2": -106.99005126953125, "logps_train/policy_2_w": -104.42792510986328, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -75.5, "logps_train/ref_1_w": -72.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -89.5, "rewards_train/1-2": -2.5503015518188477, "rewards_train/1-l": -1.776827096939087, "rewards_train/1-w": 1.4364612102508545, "rewards_train/2-2": 2.262908935546875, "rewards_train/2-w": -1.5041204690933228, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.2132883071899414, "rewards_train/margins_1": 3.986762762069702, "rewards_train/margins_2": 3.7670294046401978, "step": 424 }, { "epoch": 1.27, "logps_train/policy_1_2": -195.5179443359375, "logps_train/policy_1_l": -210.49594116210938, "logps_train/policy_1_w": -129.31890869140625, "logps_train/policy_2_2": -134.47100830078125, "logps_train/policy_2_w": -186.29249572753906, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.0152703523635864, "rewards_train/1-l": -2.9592621326446533, "rewards_train/1-w": 2.800726890563965, "rewards_train/2-2": 2.691181182861328, "rewards_train/2-w": -0.6890157461166382, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.759989023208618, "rewards_train/margins_1": 3.8159972429275513, "rewards_train/margins_2": 3.3801969289779663, "step": 424 }, { "epoch": 1.27, "logps_train/policy_1_2": -136.31451416015625, "logps_train/policy_1_l": -134.52764892578125, "logps_train/policy_1_w": -90.8870849609375, "logps_train/policy_2_2": -83.83274841308594, "logps_train/policy_2_w": -138.63592529296875, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -112.5, "logps_train/ref_2_2": -106.5, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": -1.16934072971344, "rewards_train/1-l": -1.6331846714019775, "rewards_train/1-w": 2.1568474769592285, "rewards_train/2-2": 2.244264602661133, "rewards_train/2-w": -1.1007870435714722, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.790032148361206, "rewards_train/margins_1": 3.3261882066726685, "rewards_train/margins_2": 3.345051646232605, "step": 424 }, { "epoch": 1.27, "logps_train/policy_1_2": -153.79544067382812, "logps_train/policy_1_l": -138.23175048828125, "logps_train/policy_1_w": -95.62564086914062, "logps_train/policy_2_2": -98.05540466308594, "logps_train/policy_2_w": -163.85496520996094, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.8320823907852173, "rewards_train/1-l": -1.9235410690307617, "rewards_train/1-w": 2.765268087387085, "rewards_train/2-2": 2.5491466522216797, "rewards_train/2-w": -1.6232901811599731, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.688809156417847, "rewards_train/margins_1": 3.5973504781723022, "rewards_train/margins_2": 4.172436833381653, "step": 425 }, { "epoch": 1.27, "logps_train/policy_1_2": -256.017822265625, "logps_train/policy_1_l": -228.8398895263672, "logps_train/policy_1_w": -150.15475463867188, "logps_train/policy_2_2": -154.63461303710938, "logps_train/policy_2_w": -235.41099548339844, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": -2.4799070358276367, "rewards_train/1-l": -3.253617763519287, "rewards_train/1-w": 3.9767136573791504, "rewards_train/2-2": 4.185756683349609, "rewards_train/2-w": -1.2364122867584229, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 7.2303314208984375, "rewards_train/margins_1": 6.456620693206787, "rewards_train/margins_2": 5.422168970108032, "step": 425 }, { "epoch": 1.27, "logps_train/policy_1_2": -210.208740234375, "logps_train/policy_1_l": -147.1476593017578, "logps_train/policy_1_w": -132.56578063964844, "logps_train/policy_2_2": -134.0111541748047, "logps_train/policy_2_w": -210.77337646484375, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.8274673223495483, "rewards_train/1-l": -1.5257034301757812, "rewards_train/1-w": 3.4168591499328613, "rewards_train/2-2": 3.1427574157714844, "rewards_train/2-w": -1.962493896484375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.942562580108643, "rewards_train/margins_1": 5.24432647228241, "rewards_train/margins_2": 5.105251312255859, "step": 425 }, { "epoch": 1.27, "logps_train/policy_1_2": -169.33566284179688, "logps_train/policy_1_l": -170.2896728515625, "logps_train/policy_1_w": -114.20922088623047, "logps_train/policy_2_2": -114.8980712890625, "logps_train/policy_2_w": -173.97378540039062, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.4101276397705078, "rewards_train/1-l": -2.3478760719299316, "rewards_train/1-w": 2.5325934886932373, "rewards_train/2-2": 2.47894287109375, "rewards_train/2-w": -1.7081224918365479, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.880469560623169, "rewards_train/margins_1": 3.942721128463745, "rewards_train/margins_2": 4.187065362930298, "step": 425 }, { "epoch": 1.28, "learning_rate": 1.6063863250805279e-06, "loss": 0.605, "step": 426 }, { "epoch": 1.28, "logps_train/policy_1_2": -188.78182983398438, "logps_train/policy_1_l": -196.43685913085938, "logps_train/policy_1_w": -140.28582763671875, "logps_train/policy_2_2": -125.77196502685547, "logps_train/policy_2_w": -207.46051025390625, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -1.1350181102752686, "rewards_train/1-l": -2.2946624755859375, "rewards_train/1-w": 3.358916997909546, "rewards_train/2-2": 2.9985852241516113, "rewards_train/2-w": -0.5796446204185486, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.653579473495483, "rewards_train/margins_1": 4.4939351081848145, "rewards_train/margins_2": 3.57822984457016, "step": 426 }, { "epoch": 1.28, "logps_train/policy_1_2": -166.94346618652344, "logps_train/policy_1_l": -166.88475036621094, "logps_train/policy_1_w": -100.86756896972656, "logps_train/policy_2_2": -108.72297668457031, "logps_train/policy_2_w": -149.39138793945312, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.2869243621826172, "rewards_train/1-l": -3.0110092163085938, "rewards_train/1-w": 2.310117721557617, "rewards_train/2-2": 2.7144217491149902, "rewards_train/2-w": -0.9703898429870605, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.321126937866211, "rewards_train/margins_1": 3.5970420837402344, "rewards_train/margins_2": 3.684811592102051, "step": 426 }, { "epoch": 1.28, "logps_train/policy_1_2": -198.35794067382812, "logps_train/policy_1_l": -188.75399780273438, "logps_train/policy_1_w": -123.3719711303711, "logps_train/policy_2_2": -125.27445983886719, "logps_train/policy_2_w": -196.379150390625, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.5615739822387695, "rewards_train/1-l": -2.996617317199707, "rewards_train/1-w": 2.9333112239837646, "rewards_train/2-2": 3.354194164276123, "rewards_train/2-w": -1.432055950164795, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.929928541183472, "rewards_train/margins_1": 4.494885206222534, "rewards_train/margins_2": 4.786250114440918, "step": 426 }, { "epoch": 1.28, "logps_train/policy_1_2": -175.44586181640625, "logps_train/policy_1_l": -206.456298828125, "logps_train/policy_1_w": -145.79910278320312, "logps_train/policy_2_2": -115.5381851196289, "logps_train/policy_2_w": -212.025146484375, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.2492740154266357, "rewards_train/1-l": -2.5158443450927734, "rewards_train/1-w": 2.7052464485168457, "rewards_train/2-2": 2.5860257148742676, "rewards_train/2-w": -1.4056391716003418, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.221090793609619, "rewards_train/margins_1": 3.9545204639434814, "rewards_train/margins_2": 3.9916648864746094, "step": 426 }, { "epoch": 1.28, "logps_train/policy_1_2": -206.4908905029297, "logps_train/policy_1_l": -191.72732543945312, "logps_train/policy_1_w": -131.0530548095703, "logps_train/policy_2_2": -125.51708221435547, "logps_train/policy_2_w": -235.86376953125, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -2.313736915588379, "rewards_train/1-l": -2.964845657348633, "rewards_train/1-w": 3.6121509075164795, "rewards_train/2-2": 3.104736566543579, "rewards_train/2-w": -3.1751456260681152, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.576996564865112, "rewards_train/margins_1": 5.925887823104858, "rewards_train/margins_2": 6.279882192611694, "step": 427 }, { "epoch": 1.28, "logps_train/policy_1_2": -255.2099151611328, "logps_train/policy_1_l": -223.51951599121094, "logps_train/policy_1_w": -140.71861267089844, "logps_train/policy_2_2": -151.16259765625, "logps_train/policy_2_w": -258.39117431640625, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": -2.9325146675109863, "rewards_train/1-l": -2.334519386291504, "rewards_train/1-w": 3.705091953277588, "rewards_train/2-2": 3.769871950149536, "rewards_train/2-w": -3.086775302886963, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.039611339569092, "rewards_train/margins_1": 6.637606620788574, "rewards_train/margins_2": 6.856647253036499, "step": 427 }, { "epoch": 1.28, "logps_train/policy_1_2": -198.66957092285156, "logps_train/policy_1_l": -187.10653686523438, "logps_train/policy_1_w": -134.6370086669922, "logps_train/policy_2_2": -134.3120574951172, "logps_train/policy_2_w": -210.27850341796875, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -1.0150045156478882, "rewards_train/1-l": -2.9048197269439697, "rewards_train/1-w": 3.205634355545044, "rewards_train/2-2": 3.391451120376587, "rewards_train/2-w": -1.7966008186340332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.110454082489014, "rewards_train/margins_1": 4.220638871192932, "rewards_train/margins_2": 5.18805193901062, "step": 427 }, { "epoch": 1.28, "logps_train/policy_1_2": -213.80609130859375, "logps_train/policy_1_l": -176.14724731445312, "logps_train/policy_1_w": -113.78524017333984, "logps_train/policy_2_2": -149.24551391601562, "logps_train/policy_2_w": -171.95986938476562, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.529437780380249, "rewards_train/1-l": -2.4973416328430176, "rewards_train/1-w": 2.441495418548584, "rewards_train/2-2": 2.9131431579589844, "rewards_train/2-w": -1.2727450132369995, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.938837051391602, "rewards_train/margins_1": 3.970933198928833, "rewards_train/margins_2": 4.185888171195984, "step": 427 }, { "epoch": 1.28, "learning_rate": 1.58336395386638e-06, "loss": 0.5342, "step": 428 }, { "epoch": 1.28, "logps_train/policy_1_2": -148.06497192382812, "logps_train/policy_1_l": -133.34820556640625, "logps_train/policy_1_w": -86.82333374023438, "logps_train/policy_2_2": -90.47177124023438, "logps_train/policy_2_w": -139.66653442382812, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": -1.1795446872711182, "rewards_train/1-l": -2.246342897415161, "rewards_train/1-w": 2.4467687606811523, "rewards_train/2-2": 2.5266506671905518, "rewards_train/2-w": -0.8666545152664185, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.6931116580963135, "rewards_train/margins_1": 3.6263134479522705, "rewards_train/margins_2": 3.39330518245697, "step": 428 }, { "epoch": 1.28, "logps_train/policy_1_2": -187.08200073242188, "logps_train/policy_1_l": -209.8038330078125, "logps_train/policy_1_w": -86.67047119140625, "logps_train/policy_2_2": -121.21633911132812, "logps_train/policy_2_w": -131.74240112304688, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": -1.6601526737213135, "rewards_train/1-l": -2.399132490158081, "rewards_train/1-w": 2.522015333175659, "rewards_train/2-2": 2.606003761291504, "rewards_train/2-w": -0.6625199317932129, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.92114782333374, "rewards_train/margins_1": 4.182168006896973, "rewards_train/margins_2": 3.268523693084717, "step": 428 }, { "epoch": 1.28, "logps_train/policy_1_2": -115.46640014648438, "logps_train/policy_1_l": -133.86605834960938, "logps_train/policy_1_w": -76.60235595703125, "logps_train/policy_2_2": -74.14947509765625, "logps_train/policy_2_w": -126.12074279785156, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -114.5, "rewards_train/1-2": -1.1692960262298584, "rewards_train/1-l": -1.9889249801635742, "rewards_train/1-w": 2.052654981613159, "rewards_train/2-2": 1.8041927814483643, "rewards_train/2-w": -1.1634411811828613, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.041579961776733, "rewards_train/margins_1": 3.2219510078430176, "rewards_train/margins_2": 2.9676339626312256, "step": 428 }, { "epoch": 1.28, "logps_train/policy_1_2": -129.2402801513672, "logps_train/policy_1_l": -132.529296875, "logps_train/policy_1_w": -102.82254028320312, "logps_train/policy_2_2": -76.39019012451172, "logps_train/policy_2_w": -168.5301055908203, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.6185588836669922, "rewards_train/1-l": -1.9512214660644531, "rewards_train/1-w": 2.001534938812256, "rewards_train/2-2": 1.5654728412628174, "rewards_train/2-w": -2.104182004928589, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.952756404876709, "rewards_train/margins_1": 3.620093822479248, "rewards_train/margins_2": 3.6696548461914062, "step": 428 }, { "epoch": 1.28, "logps_train/policy_1_2": -198.282470703125, "logps_train/policy_1_l": -219.20553588867188, "logps_train/policy_1_w": -165.48251342773438, "logps_train/policy_2_2": -133.22256469726562, "logps_train/policy_2_w": -256.51092529296875, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": -1.2524654865264893, "rewards_train/1-l": -2.7348103523254395, "rewards_train/1-w": 4.272843360900879, "rewards_train/2-2": 3.0910253524780273, "rewards_train/2-w": -1.8167181015014648, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.007653713226318, "rewards_train/margins_1": 5.525308847427368, "rewards_train/margins_2": 4.907743453979492, "step": 429 }, { "epoch": 1.28, "logps_train/policy_1_2": -138.38845825195312, "logps_train/policy_1_l": -96.77554321289062, "logps_train/policy_1_w": -82.01689910888672, "logps_train/policy_2_2": -85.53875732421875, "logps_train/policy_2_w": -121.80287170410156, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -81.5, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -118.0, "rewards_train/1-2": -1.2153593301773071, "rewards_train/1-l": -1.5131256580352783, "rewards_train/1-w": 2.226020336151123, "rewards_train/2-2": 2.390704870223999, "rewards_train/2-w": -0.37528252601623535, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.6875, "rewards_train/margins": 3.7391459941864014, "rewards_train/margins_1": 3.44137966632843, "rewards_train/margins_2": 2.7659873962402344, "step": 429 }, { "epoch": 1.28, "logps_train/policy_1_2": -228.97959899902344, "logps_train/policy_1_l": -163.24339294433594, "logps_train/policy_1_w": -125.07949829101562, "logps_train/policy_2_2": -151.28903198242188, "logps_train/policy_2_w": -207.98416137695312, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -2.4159281253814697, "rewards_train/1-l": -1.9526132345199585, "rewards_train/1-w": 3.053476095199585, "rewards_train/2-2": 2.9121124744415283, "rewards_train/2-w": -1.9960732460021973, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.0060893297195435, "rewards_train/margins_1": 5.469404220581055, "rewards_train/margins_2": 4.908185720443726, "step": 429 }, { "epoch": 1.28, "logps_train/policy_1_2": -129.97207641601562, "logps_train/policy_1_l": -150.979248046875, "logps_train/policy_1_w": -78.26763916015625, "logps_train/policy_2_2": -79.79328918457031, "logps_train/policy_2_w": -126.53741455078125, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": -1.4850006103515625, "rewards_train/1-l": -2.2693114280700684, "rewards_train/1-w": 1.9632755517959595, "rewards_train/2-2": 1.9415696859359741, "rewards_train/2-w": -1.1617491245269775, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.232586979866028, "rewards_train/margins_1": 3.448276162147522, "rewards_train/margins_2": 3.1033188104629517, "step": 429 }, { "epoch": 1.29, "learning_rate": 1.5604310447144052e-06, "loss": 0.6907, "step": 430 }, { "epoch": 1.29, "logps_train/policy_1_2": -152.9289093017578, "logps_train/policy_1_l": -135.17300415039062, "logps_train/policy_1_w": -128.09115600585938, "logps_train/policy_2_2": -94.89275360107422, "logps_train/policy_2_w": -201.69424438476562, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.0882034301757812, "rewards_train/1-l": -2.0890774726867676, "rewards_train/1-w": 3.3143224716186523, "rewards_train/2-2": 2.8404126167297363, "rewards_train/2-w": -1.8153233528137207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.40339994430542, "rewards_train/margins_1": 4.402525901794434, "rewards_train/margins_2": 4.655735969543457, "step": 430 }, { "epoch": 1.29, "logps_train/policy_1_2": -173.47393798828125, "logps_train/policy_1_l": -179.1458282470703, "logps_train/policy_1_w": -112.27903747558594, "logps_train/policy_2_2": -110.37691497802734, "logps_train/policy_2_w": -190.4290771484375, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.221612572669983, "rewards_train/1-l": -1.684798240661621, "rewards_train/1-w": 2.9550065994262695, "rewards_train/2-2": 2.7257845401763916, "rewards_train/2-w": -1.693884015083313, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.639804840087891, "rewards_train/margins_1": 4.176619172096252, "rewards_train/margins_2": 4.419668555259705, "step": 430 }, { "epoch": 1.29, "logps_train/policy_1_2": -165.30514526367188, "logps_train/policy_1_l": -154.131591796875, "logps_train/policy_1_w": -119.10865783691406, "logps_train/policy_2_2": -106.4075927734375, "logps_train/policy_2_w": -196.1432647705078, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.335201382637024, "rewards_train/1-l": -2.220484733581543, "rewards_train/1-w": 3.4613990783691406, "rewards_train/2-2": 2.7901976108551025, "rewards_train/2-w": -1.9807326793670654, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.681883811950684, "rewards_train/margins_1": 4.7966004610061646, "rewards_train/margins_2": 4.770930290222168, "step": 430 }, { "epoch": 1.29, "logps_train/policy_1_2": -134.99269104003906, "logps_train/policy_1_l": -153.24298095703125, "logps_train/policy_1_w": -135.18228149414062, "logps_train/policy_2_2": -82.52534484863281, "logps_train/policy_2_w": -217.49813842773438, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -1.2605974674224854, "rewards_train/1-l": -2.076251983642578, "rewards_train/1-w": 3.654428005218506, "rewards_train/2-2": 2.1295950412750244, "rewards_train/2-w": -2.217780590057373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.730679988861084, "rewards_train/margins_1": 4.915025472640991, "rewards_train/margins_2": 4.3473756313323975, "step": 430 }, { "epoch": 1.29, "logps_train/policy_1_2": -219.38697814941406, "logps_train/policy_1_l": -184.7139892578125, "logps_train/policy_1_w": -107.15113067626953, "logps_train/policy_2_2": -135.09181213378906, "logps_train/policy_2_w": -178.9088134765625, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -2.6715102195739746, "rewards_train/1-l": -2.1368298530578613, "rewards_train/1-w": 2.9555904865264893, "rewards_train/2-2": 3.0001935958862305, "rewards_train/2-w": -1.7393183708190918, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.092420339584351, "rewards_train/margins_1": 5.627100706100464, "rewards_train/margins_2": 4.739511966705322, "step": 431 }, { "epoch": 1.29, "logps_train/policy_1_2": -184.2877197265625, "logps_train/policy_1_l": -180.46896362304688, "logps_train/policy_1_w": -107.78886413574219, "logps_train/policy_2_2": -116.77070617675781, "logps_train/policy_2_w": -163.82168579101562, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -1.7733041048049927, "rewards_train/1-l": -2.5931849479675293, "rewards_train/1-w": 2.4116897583007812, "rewards_train/2-2": 2.583085775375366, "rewards_train/2-w": -1.080678939819336, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.0048747062683105, "rewards_train/margins_1": 4.184993863105774, "rewards_train/margins_2": 3.663764715194702, "step": 431 }, { "epoch": 1.29, "logps_train/policy_1_2": -139.44166564941406, "logps_train/policy_1_l": -158.0893096923828, "logps_train/policy_1_w": -119.0597915649414, "logps_train/policy_2_2": -94.249755859375, "logps_train/policy_2_w": -179.76539611816406, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -0.8506114482879639, "rewards_train/1-l": -2.3649849891662598, "rewards_train/1-w": 2.7252707481384277, "rewards_train/2-2": 2.1506099700927734, "rewards_train/2-w": -1.461500644683838, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.0902557373046875, "rewards_train/margins_1": 3.5758821964263916, "rewards_train/margins_2": 3.6121106147766113, "step": 431 }, { "epoch": 1.29, "logps_train/policy_1_2": -204.3300018310547, "logps_train/policy_1_l": -201.43170166015625, "logps_train/policy_1_w": -121.19754791259766, "logps_train/policy_2_2": -135.13778686523438, "logps_train/policy_2_w": -182.8582000732422, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.8068281412124634, "rewards_train/1-l": -2.9956107139587402, "rewards_train/1-w": 2.5628132820129395, "rewards_train/2-2": 2.7955965995788574, "rewards_train/2-w": -1.272538661956787, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.55842399597168, "rewards_train/margins_1": 4.369641423225403, "rewards_train/margins_2": 4.0681352615356445, "step": 431 }, { "epoch": 1.29, "learning_rate": 1.5375898358361079e-06, "loss": 0.5774, "step": 432 }, { "epoch": 1.29, "logps_train/policy_1_2": -188.2144775390625, "logps_train/policy_1_l": -169.90121459960938, "logps_train/policy_1_w": -138.1573486328125, "logps_train/policy_2_2": -124.21273803710938, "logps_train/policy_2_w": -202.19956970214844, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -0.9941034913063049, "rewards_train/1-l": -2.606576919555664, "rewards_train/1-w": 3.9044809341430664, "rewards_train/2-2": 3.2627103328704834, "rewards_train/2-w": -0.36487817764282227, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.5110578536987305, "rewards_train/margins_1": 4.898584425449371, "rewards_train/margins_2": 3.6275885105133057, "step": 432 }, { "epoch": 1.29, "logps_train/policy_1_2": -151.87843322753906, "logps_train/policy_1_l": -179.21749877929688, "logps_train/policy_1_w": -133.64752197265625, "logps_train/policy_2_2": -104.87200927734375, "logps_train/policy_2_w": -191.07000732421875, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -0.812842845916748, "rewards_train/1-l": -2.30700421333313, "rewards_train/1-w": 2.8031671047210693, "rewards_train/2-2": 2.24473237991333, "rewards_train/2-w": -1.1007505655288696, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.110171318054199, "rewards_train/margins_1": 3.6160099506378174, "rewards_train/margins_2": 3.3454829454421997, "step": 432 }, { "epoch": 1.29, "logps_train/policy_1_2": -135.86276245117188, "logps_train/policy_1_l": -110.91028594970703, "logps_train/policy_1_w": -65.04158020019531, "logps_train/policy_2_2": -81.98522186279297, "logps_train/policy_2_w": -117.49185180664062, "logps_train/ref_1_2": -119.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -102.5, "rewards_train/1-2": -1.6780726909637451, "rewards_train/1-l": -1.7211060523986816, "rewards_train/1-w": 1.9884687662124634, "rewards_train/2-2": 2.0807745456695557, "rewards_train/2-w": -1.5220369100570679, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.709574818611145, "rewards_train/margins_1": 3.6665414571762085, "rewards_train/margins_2": 3.6028114557266235, "step": 432 }, { "epoch": 1.29, "logps_train/policy_1_2": -116.14297485351562, "logps_train/policy_1_l": -108.899658203125, "logps_train/policy_1_w": -54.09691619873047, "logps_train/policy_2_2": -69.96611022949219, "logps_train/policy_2_w": -96.6313705444336, "logps_train/ref_1_2": -107.0, "logps_train/ref_1_l": -87.5, "logps_train/ref_1_w": -73.5, "logps_train/ref_2_2": -90.5, "logps_train/ref_2_w": -89.5, "rewards_train/1-2": -0.9291417002677917, "rewards_train/1-l": -2.131908893585205, "rewards_train/1-w": 1.9280036687850952, "rewards_train/2-2": 2.074091911315918, "rewards_train/2-w": -0.7176291942596436, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.0599125623703, "rewards_train/margins_1": 2.857145369052887, "rewards_train/margins_2": 2.7917211055755615, "step": 432 }, { "epoch": 1.3, "logps_train/policy_1_2": -202.66015625, "logps_train/policy_1_l": -152.06158447265625, "logps_train/policy_1_w": -110.58741760253906, "logps_train/policy_2_2": -129.53265380859375, "logps_train/policy_2_w": -177.1258087158203, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -2.1324236392974854, "rewards_train/1-l": -1.856743574142456, "rewards_train/1-w": 2.941941499710083, "rewards_train/2-2": 2.83189058303833, "rewards_train/2-w": -1.4200031757354736, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.798685073852539, "rewards_train/margins_1": 5.074365139007568, "rewards_train/margins_2": 4.251893758773804, "step": 433 }, { "epoch": 1.3, "logps_train/policy_1_2": -183.96475219726562, "logps_train/policy_1_l": -162.22836303710938, "logps_train/policy_1_w": -109.25323486328125, "logps_train/policy_2_2": -130.83609008789062, "logps_train/policy_2_w": -173.61459350585938, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.7206939458847046, "rewards_train/1-l": -2.3492026329040527, "rewards_train/1-w": 3.0383477210998535, "rewards_train/2-2": 2.5507662296295166, "rewards_train/2-w": -1.3013019561767578, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.387550354003906, "rewards_train/margins_1": 3.759041666984558, "rewards_train/margins_2": 3.8520681858062744, "step": 433 }, { "epoch": 1.3, "logps_train/policy_1_2": -187.07850646972656, "logps_train/policy_1_l": -162.99244689941406, "logps_train/policy_1_w": -135.17462158203125, "logps_train/policy_2_2": -128.0684814453125, "logps_train/policy_2_w": -212.2987060546875, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -0.9001597166061401, "rewards_train/1-l": -1.8266863822937012, "rewards_train/1-w": 3.0442569255828857, "rewards_train/2-2": 3.0904183387756348, "rewards_train/2-w": -1.9439337253570557, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.870943307876587, "rewards_train/margins_1": 3.944416642189026, "rewards_train/margins_2": 5.03435206413269, "step": 433 }, { "epoch": 1.3, "logps_train/policy_1_2": -160.627685546875, "logps_train/policy_1_l": -117.73480224609375, "logps_train/policy_1_w": -98.62950897216797, "logps_train/policy_2_2": -100.342529296875, "logps_train/policy_2_w": -158.60972595214844, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -98.5, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -1.7908921241760254, "rewards_train/1-l": -1.91029691696167, "rewards_train/1-w": 2.6261115074157715, "rewards_train/2-2": 2.3579347133636475, "rewards_train/2-w": -1.451206922531128, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.536408424377441, "rewards_train/margins_1": 4.417003631591797, "rewards_train/margins_2": 3.8091416358947754, "step": 433 }, { "epoch": 1.3, "learning_rate": 1.5148425564932085e-06, "loss": 0.6927, "step": 434 }, { "epoch": 1.3, "logps_train/policy_1_2": -151.836181640625, "logps_train/policy_1_l": -160.94671630859375, "logps_train/policy_1_w": -81.64617919921875, "logps_train/policy_2_2": -91.88685607910156, "logps_train/policy_2_w": -141.2832489013672, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -106.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": -1.3783445358276367, "rewards_train/1-l": -2.019768714904785, "rewards_train/1-w": 2.4924135208129883, "rewards_train/2-2": 2.425572633743286, "rewards_train/2-w": -1.1877000331878662, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.512182235717773, "rewards_train/margins_1": 3.870758056640625, "rewards_train/margins_2": 3.6132726669311523, "step": 434 }, { "epoch": 1.3, "logps_train/policy_1_2": -159.58648681640625, "logps_train/policy_1_l": -158.2130126953125, "logps_train/policy_1_w": -92.77416229248047, "logps_train/policy_2_2": -102.70826721191406, "logps_train/policy_2_w": -146.18727111816406, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -118.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -1.3522040843963623, "rewards_train/1-l": -2.1783814430236816, "rewards_train/1-w": 2.59250545501709, "rewards_train/2-2": 2.5813217163085938, "rewards_train/2-w": -0.9493917226791382, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.7708868980407715, "rewards_train/margins_1": 3.944709539413452, "rewards_train/margins_2": 3.530713438987732, "step": 434 }, { "epoch": 1.3, "logps_train/policy_1_2": -206.59083557128906, "logps_train/policy_1_l": -212.74267578125, "logps_train/policy_1_w": -160.10845947265625, "logps_train/policy_2_2": -141.61288452148438, "logps_train/policy_2_w": -236.9912567138672, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -0.8809584379196167, "rewards_train/1-l": -2.6616227626800537, "rewards_train/1-w": 3.13759183883667, "rewards_train/2-2": 3.3598053455352783, "rewards_train/2-w": -1.699906349182129, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.799214601516724, "rewards_train/margins_1": 4.018550276756287, "rewards_train/margins_2": 5.059711694717407, "step": 434 }, { "epoch": 1.3, "logps_train/policy_1_2": -179.248779296875, "logps_train/policy_1_l": -185.58883666992188, "logps_train/policy_1_w": -92.84851837158203, "logps_train/policy_2_2": -118.83750915527344, "logps_train/policy_2_w": -146.72642517089844, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": -0.689721941947937, "rewards_train/1-l": -3.131442070007324, "rewards_train/1-w": 2.3177361488342285, "rewards_train/2-2": 3.2521862983703613, "rewards_train/2-w": -1.3652207851409912, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.449178218841553, "rewards_train/margins_1": 3.0074580907821655, "rewards_train/margins_2": 4.6174070835113525, "step": 434 }, { "epoch": 1.3, "logps_train/policy_1_2": -160.4642333984375, "logps_train/policy_1_l": -189.9505615234375, "logps_train/policy_1_w": -144.46319580078125, "logps_train/policy_2_2": -95.9893798828125, "logps_train/policy_2_w": -229.5170440673828, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -1.5628286600112915, "rewards_train/1-l": -2.9614622592926025, "rewards_train/1-w": 3.4417665004730225, "rewards_train/2-2": 2.428406238555908, "rewards_train/2-w": -2.632953643798828, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.403228759765625, "rewards_train/margins_1": 5.004595160484314, "rewards_train/margins_2": 5.061359882354736, "step": 435 }, { "epoch": 1.3, "logps_train/policy_1_2": -170.28329467773438, "logps_train/policy_1_l": -128.18898010253906, "logps_train/policy_1_w": -100.82581329345703, "logps_train/policy_2_2": -106.38388061523438, "logps_train/policy_2_w": -147.6376190185547, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -1.9138767719268799, "rewards_train/1-l": -2.417188882827759, "rewards_train/1-w": 2.5676143169403076, "rewards_train/2-2": 2.5971591472625732, "rewards_train/2-w": -0.6192313432693481, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.984803199768066, "rewards_train/margins_1": 4.4814910888671875, "rewards_train/margins_2": 3.2163904905319214, "step": 435 }, { "epoch": 1.3, "logps_train/policy_1_2": -247.2111358642578, "logps_train/policy_1_l": -138.36553955078125, "logps_train/policy_1_w": -113.82759094238281, "logps_train/policy_2_2": -144.6663055419922, "logps_train/policy_2_w": -195.9312286376953, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -2.3445510864257812, "rewards_train/1-l": -2.0009217262268066, "rewards_train/1-w": 3.4531781673431396, "rewards_train/2-2": 3.9515345096588135, "rewards_train/2-w": -2.080622434616089, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.454099893569946, "rewards_train/margins_1": 5.797729253768921, "rewards_train/margins_2": 6.032156944274902, "step": 435 }, { "epoch": 1.3, "logps_train/policy_1_2": -214.33673095703125, "logps_train/policy_1_l": -230.44937133789062, "logps_train/policy_1_w": -132.07762145996094, "logps_train/policy_2_2": -152.30319213867188, "logps_train/policy_2_w": -187.6544952392578, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -1.2340644598007202, "rewards_train/1-l": -2.962709426879883, "rewards_train/1-w": 3.2391138076782227, "rewards_train/2-2": 3.081399440765381, "rewards_train/2-w": -0.6619337797164917, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 6.2018232345581055, "rewards_train/margins_1": 4.473178267478943, "rewards_train/margins_2": 3.7433332204818726, "step": 435 }, { "epoch": 1.31, "learning_rate": 1.49219142678007e-06, "loss": 0.6107, "step": 436 }, { "epoch": 1.31, "logps_train/policy_1_2": -188.3167724609375, "logps_train/policy_1_l": -174.96095275878906, "logps_train/policy_1_w": -119.84827423095703, "logps_train/policy_2_2": -122.22752380371094, "logps_train/policy_2_w": -175.18899536132812, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -0.7398807406425476, "rewards_train/1-l": -2.6980483531951904, "rewards_train/1-w": 3.6301140785217285, "rewards_train/2-2": 3.488966703414917, "rewards_train/2-w": -0.0016129612922668457, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.328162431716919, "rewards_train/margins_1": 4.369994819164276, "rewards_train/margins_2": 3.490579664707184, "step": 436 }, { "epoch": 1.31, "logps_train/policy_1_2": -179.49234008789062, "logps_train/policy_1_l": -215.42984008789062, "logps_train/policy_1_w": -150.19210815429688, "logps_train/policy_2_2": -109.56578826904297, "logps_train/policy_2_w": -250.42523193359375, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": -1.8882970809936523, "rewards_train/1-l": -2.692007541656494, "rewards_train/1-w": 3.432547092437744, "rewards_train/2-2": 2.5688118934631348, "rewards_train/2-w": -2.6999459266662598, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.124554634094238, "rewards_train/margins_1": 5.3208441734313965, "rewards_train/margins_2": 5.2687578201293945, "step": 436 }, { "epoch": 1.31, "logps_train/policy_1_2": -210.13821411132812, "logps_train/policy_1_l": -224.68118286132812, "logps_train/policy_1_w": -134.42529296875, "logps_train/policy_2_2": -145.7178955078125, "logps_train/policy_2_w": -212.40362548828125, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.0844275951385498, "rewards_train/1-l": -2.4034695625305176, "rewards_train/1-w": 3.528271198272705, "rewards_train/2-2": 3.0822641849517822, "rewards_train/2-w": -1.434894323348999, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.931740760803223, "rewards_train/margins_1": 4.612698793411255, "rewards_train/margins_2": 4.517158508300781, "step": 436 }, { "epoch": 1.31, "logps_train/policy_1_2": -147.12454223632812, "logps_train/policy_1_l": -178.50823974609375, "logps_train/policy_1_w": -113.19635009765625, "logps_train/policy_2_2": -97.77484130859375, "logps_train/policy_2_w": -172.61068725585938, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -0.9497590065002441, "rewards_train/1-l": -2.1701602935791016, "rewards_train/1-w": 2.770599842071533, "rewards_train/2-2": 2.0850162506103516, "rewards_train/2-w": -1.1606783866882324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.940760135650635, "rewards_train/margins_1": 3.7203588485717773, "rewards_train/margins_2": 3.245694637298584, "step": 436 }, { "epoch": 1.31, "logps_train/policy_1_2": -110.34773254394531, "logps_train/policy_1_l": -76.97895050048828, "logps_train/policy_1_w": -63.54456329345703, "logps_train/policy_2_2": -65.1285400390625, "logps_train/policy_2_w": -105.18649291992188, "logps_train/ref_1_2": -98.0, "logps_train/ref_1_l": -60.25, "logps_train/ref_1_w": -85.5, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -95.5, "rewards_train/1-2": -1.2082107067108154, "rewards_train/1-l": -1.6816840171813965, "rewards_train/1-w": 2.1807003021240234, "rewards_train/2-2": 1.9785524606704712, "rewards_train/2-w": -0.9668923020362854, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.86238431930542, "rewards_train/margins_1": 3.388911008834839, "rewards_train/margins_2": 2.9454447627067566, "step": 437 }, { "epoch": 1.31, "logps_train/policy_1_2": -138.56759643554688, "logps_train/policy_1_l": -143.8929443359375, "logps_train/policy_1_w": -91.37362670898438, "logps_train/policy_2_2": -79.87240600585938, "logps_train/policy_2_w": -143.5133056640625, "logps_train/ref_1_2": -122.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -1.6096888780593872, "rewards_train/1-l": -2.41644287109375, "rewards_train/1-w": 2.053457736968994, "rewards_train/2-2": 2.6826815605163574, "rewards_train/2-w": -1.3411741256713867, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.469900608062744, "rewards_train/margins_1": 3.6631466150283813, "rewards_train/margins_2": 4.023855686187744, "step": 437 }, { "epoch": 1.31, "logps_train/policy_1_2": -186.54510498046875, "logps_train/policy_1_l": -199.80694580078125, "logps_train/policy_1_w": -102.83036804199219, "logps_train/policy_2_2": -133.30490112304688, "logps_train/policy_2_w": -150.682861328125, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -0.5943546295166016, "rewards_train/1-l": -2.9424147605895996, "rewards_train/1-w": 2.775556802749634, "rewards_train/2-2": 3.110135555267334, "rewards_train/2-w": -0.3909429907798767, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.717971563339233, "rewards_train/margins_1": 3.3699114322662354, "rewards_train/margins_2": 3.5010785460472107, "step": 437 }, { "epoch": 1.31, "logps_train/policy_1_2": -137.906005859375, "logps_train/policy_1_l": -81.41903686523438, "logps_train/policy_1_w": -48.05321502685547, "logps_train/policy_2_2": -83.25100708007812, "logps_train/policy_2_w": -85.96355438232422, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -62.25, "logps_train/ref_1_w": -66.0, "logps_train/ref_2_2": -107.5, "logps_train/ref_2_w": -79.0, "rewards_train/1-2": -1.3297125101089478, "rewards_train/1-l": -1.91353440284729, "rewards_train/1-w": 1.8149909973144531, "rewards_train/2-2": 2.398385524749756, "rewards_train/2-w": -0.7170583605766296, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.728525400161743, "rewards_train/margins_1": 3.144703507423401, "rewards_train/margins_2": 3.1154438853263855, "step": 437 }, { "epoch": 1.31, "learning_rate": 1.4696386574070203e-06, "loss": 0.5805, "step": 438 }, { "epoch": 1.31, "logps_train/policy_1_2": -151.86679077148438, "logps_train/policy_1_l": -168.21385192871094, "logps_train/policy_1_w": -119.77819061279297, "logps_train/policy_2_2": -108.71356201171875, "logps_train/policy_2_w": -176.19410705566406, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.6306238174438477, "rewards_train/1-l": -2.0580060482025146, "rewards_train/1-w": 3.4128057956695557, "rewards_train/2-2": 2.3775691986083984, "rewards_train/2-w": -0.6065201759338379, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.47081184387207, "rewards_train/margins_1": 4.043429613113403, "rewards_train/margins_2": 2.9840893745422363, "step": 438 }, { "epoch": 1.31, "logps_train/policy_1_2": -265.831298828125, "logps_train/policy_1_l": -194.35333251953125, "logps_train/policy_1_w": -119.04186248779297, "logps_train/policy_2_2": -168.57620239257812, "logps_train/policy_2_w": -196.15985107421875, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -3.4815666675567627, "rewards_train/1-l": -2.774199962615967, "rewards_train/1-w": 3.12100887298584, "rewards_train/2-2": 3.0494112968444824, "rewards_train/2-w": -1.8159860372543335, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.895208835601807, "rewards_train/margins_1": 6.6025755405426025, "rewards_train/margins_2": 4.865397334098816, "step": 438 }, { "epoch": 1.31, "logps_train/policy_1_2": -219.76771545410156, "logps_train/policy_1_l": -196.81298828125, "logps_train/policy_1_w": -135.1979522705078, "logps_train/policy_2_2": -150.2665557861328, "logps_train/policy_2_w": -204.2064208984375, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.3783328533172607, "rewards_train/1-l": -1.9823744297027588, "rewards_train/1-w": 3.18293833732605, "rewards_train/2-2": 3.3956103324890137, "rewards_train/2-w": -1.3171263933181763, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.165312767028809, "rewards_train/margins_1": 4.5612711906433105, "rewards_train/margins_2": 4.71273672580719, "step": 438 }, { "epoch": 1.31, "logps_train/policy_1_2": -164.60679626464844, "logps_train/policy_1_l": -154.54872131347656, "logps_train/policy_1_w": -112.66534423828125, "logps_train/policy_2_2": -112.89852905273438, "logps_train/policy_2_w": -163.9658660888672, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": -0.5989614725112915, "rewards_train/1-l": -1.9478410482406616, "rewards_train/1-w": 2.36940336227417, "rewards_train/2-2": 2.784756660461426, "rewards_train/2-w": -1.0038130283355713, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.3172444105148315, "rewards_train/margins_1": 2.9683648347854614, "rewards_train/margins_2": 3.788569688796997, "step": 438 }, { "epoch": 1.31, "logps_train/policy_1_2": -233.92759704589844, "logps_train/policy_1_l": -215.00782775878906, "logps_train/policy_1_w": -120.55353546142578, "logps_train/policy_2_2": -145.65237426757812, "logps_train/policy_2_w": -214.0526123046875, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -2.970104217529297, "rewards_train/1-l": -2.7931652069091797, "rewards_train/1-w": 3.066131114959717, "rewards_train/2-2": 2.861715316772461, "rewards_train/2-w": -2.582117795944214, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.8592963218688965, "rewards_train/margins_1": 6.036235332489014, "rewards_train/margins_2": 5.443833112716675, "step": 439 }, { "epoch": 1.31, "logps_train/policy_1_2": -202.93310546875, "logps_train/policy_1_l": -190.27099609375, "logps_train/policy_1_w": -122.29440307617188, "logps_train/policy_2_2": -133.0540771484375, "logps_train/policy_2_w": -182.29852294921875, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -2.000342845916748, "rewards_train/1-l": -2.063720226287842, "rewards_train/1-w": 2.633841037750244, "rewards_train/2-2": 2.6831674575805664, "rewards_train/2-w": -1.3245790004730225, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.697561264038086, "rewards_train/margins_1": 4.634183883666992, "rewards_train/margins_2": 4.007746458053589, "step": 439 }, { "epoch": 1.31, "logps_train/policy_1_2": -154.84701538085938, "logps_train/policy_1_l": -124.95621490478516, "logps_train/policy_1_w": -114.1417236328125, "logps_train/policy_2_2": -88.52891540527344, "logps_train/policy_2_w": -194.98529052734375, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -2.3839211463928223, "rewards_train/1-l": -2.003140926361084, "rewards_train/1-w": 2.8407106399536133, "rewards_train/2-2": 2.0356831550598145, "rewards_train/2-w": -2.6479434967041016, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.843851566314697, "rewards_train/margins_1": 5.2246317863464355, "rewards_train/margins_2": 4.683626651763916, "step": 439 }, { "epoch": 1.31, "logps_train/policy_1_2": -163.69358825683594, "logps_train/policy_1_l": -146.89263916015625, "logps_train/policy_1_w": -130.83255004882812, "logps_train/policy_2_2": -105.51878356933594, "logps_train/policy_2_w": -208.10098266601562, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.2221907377243042, "rewards_train/1-l": -2.0248591899871826, "rewards_train/1-w": 3.044283390045166, "rewards_train/2-2": 2.3986105918884277, "rewards_train/2-w": -1.718496322631836, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.069142580032349, "rewards_train/margins_1": 4.26647412776947, "rewards_train/margins_2": 4.117106914520264, "step": 439 }, { "epoch": 1.32, "learning_rate": 1.447186449484593e-06, "loss": 0.6624, "step": 440 }, { "epoch": 1.32, "logps_train/policy_1_2": -196.70619201660156, "logps_train/policy_1_l": -199.91502380371094, "logps_train/policy_1_w": -132.41073608398438, "logps_train/policy_2_2": -121.0833969116211, "logps_train/policy_2_w": -213.501708984375, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -1.525306224822998, "rewards_train/1-l": -2.0323843955993652, "rewards_train/1-w": 3.055021286010742, "rewards_train/2-2": 3.3432226181030273, "rewards_train/2-w": -1.9845468997955322, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.087405681610107, "rewards_train/margins_1": 4.58032751083374, "rewards_train/margins_2": 5.32776951789856, "step": 440 }, { "epoch": 1.32, "logps_train/policy_1_2": -195.05081176757812, "logps_train/policy_1_l": -170.67681884765625, "logps_train/policy_1_w": -151.80752563476562, "logps_train/policy_2_2": -130.00173950195312, "logps_train/policy_2_w": -220.84095764160156, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -1.2845747470855713, "rewards_train/1-l": -2.53135347366333, "rewards_train/1-w": 3.3986921310424805, "rewards_train/2-2": 3.340744972229004, "rewards_train/2-w": -1.3438611030578613, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.9300456047058105, "rewards_train/margins_1": 4.683266878128052, "rewards_train/margins_2": 4.684606075286865, "step": 440 }, { "epoch": 1.32, "logps_train/policy_1_2": -142.49496459960938, "logps_train/policy_1_l": -180.7201690673828, "logps_train/policy_1_w": -131.34922790527344, "logps_train/policy_2_2": -93.00215911865234, "logps_train/policy_2_w": -194.64979553222656, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.5159035325050354, "rewards_train/1-l": -3.2426223754882812, "rewards_train/1-w": 2.952773094177246, "rewards_train/2-2": 2.8138463497161865, "rewards_train/2-w": -1.1578505039215088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.195395469665527, "rewards_train/margins_1": 3.4686766266822815, "rewards_train/margins_2": 3.9716968536376953, "step": 440 }, { "epoch": 1.32, "logps_train/policy_1_2": -248.26019287109375, "logps_train/policy_1_l": -229.2958984375, "logps_train/policy_1_w": -154.19412231445312, "logps_train/policy_2_2": -174.38229370117188, "logps_train/policy_2_w": -236.92269897460938, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -1.6920366287231445, "rewards_train/1-l": -3.4295897483825684, "rewards_train/1-w": 3.47355580329895, "rewards_train/2-2": 3.49692702293396, "rewards_train/2-w": -1.886800765991211, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.9031455516815186, "rewards_train/margins_1": 5.165592432022095, "rewards_train/margins_2": 5.383727788925171, "step": 440 }, { "epoch": 1.32, "logps_train/policy_1_2": -209.30709838867188, "logps_train/policy_1_l": -175.52362060546875, "logps_train/policy_1_w": -121.67365264892578, "logps_train/policy_2_2": -131.46974182128906, "logps_train/policy_2_w": -185.40521240234375, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -1.7670378684997559, "rewards_train/1-l": -2.4820985794067383, "rewards_train/1-w": 2.8986501693725586, "rewards_train/2-2": 3.4655251502990723, "rewards_train/2-w": -0.9891532063484192, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.380748748779297, "rewards_train/margins_1": 4.6656880378723145, "rewards_train/margins_2": 4.4546783566474915, "step": 441 }, { "epoch": 1.32, "logps_train/policy_1_2": -190.18887329101562, "logps_train/policy_1_l": -189.587646484375, "logps_train/policy_1_w": -110.63607788085938, "logps_train/policy_2_2": -118.64913177490234, "logps_train/policy_2_w": -177.93560791015625, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -2.04193377494812, "rewards_train/1-l": -2.511620044708252, "rewards_train/1-w": 3.0664708614349365, "rewards_train/2-2": 2.4866490364074707, "rewards_train/2-w": -1.4669990539550781, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.5780909061431885, "rewards_train/margins_1": 5.108404636383057, "rewards_train/margins_2": 3.953648090362549, "step": 441 }, { "epoch": 1.32, "logps_train/policy_1_2": -235.59481811523438, "logps_train/policy_1_l": -188.87338256835938, "logps_train/policy_1_w": -114.16065979003906, "logps_train/policy_2_2": -152.46463012695312, "logps_train/policy_2_w": -190.22491455078125, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.930967092514038, "rewards_train/1-l": -2.2000327110290527, "rewards_train/1-w": 3.1306142807006836, "rewards_train/2-2": 3.5859575271606445, "rewards_train/2-w": -1.4181945323944092, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.330646991729736, "rewards_train/margins_1": 5.061581373214722, "rewards_train/margins_2": 5.004152059555054, "step": 441 }, { "epoch": 1.32, "logps_train/policy_1_2": -186.5133514404297, "logps_train/policy_1_l": -213.35629272460938, "logps_train/policy_1_w": -113.8573226928711, "logps_train/policy_2_2": -122.64290618896484, "logps_train/policy_2_w": -177.11988830566406, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.8056325912475586, "rewards_train/1-l": -3.121957778930664, "rewards_train/1-w": 2.8519630432128906, "rewards_train/2-2": 2.509146213531494, "rewards_train/2-w": -1.1010513305664062, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.973920822143555, "rewards_train/margins_1": 4.657595634460449, "rewards_train/margins_2": 3.6101975440979004, "step": 441 }, { "epoch": 1.32, "learning_rate": 1.4248369943086997e-06, "loss": 0.5053, "step": 442 }, { "epoch": 1.32, "logps_train/policy_1_2": -208.56552124023438, "logps_train/policy_1_l": -154.27818298339844, "logps_train/policy_1_w": -83.94361114501953, "logps_train/policy_2_2": -127.89459991455078, "logps_train/policy_2_w": -150.49273681640625, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": -2.168271541595459, "rewards_train/1-l": -2.348325729370117, "rewards_train/1-w": 2.6986074447631836, "rewards_train/2-2": 3.3003835678100586, "rewards_train/2-w": -1.8160704374313354, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.046933174133301, "rewards_train/margins_1": 4.866878986358643, "rewards_train/margins_2": 5.116454005241394, "step": 442 }, { "epoch": 1.32, "logps_train/policy_1_2": -121.715576171875, "logps_train/policy_1_l": -152.68292236328125, "logps_train/policy_1_w": -57.44670867919922, "logps_train/policy_2_2": -68.12644958496094, "logps_train/policy_2_w": -113.4579849243164, "logps_train/ref_1_2": -108.5, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -91.5, "logps_train/ref_2_w": -97.5, "rewards_train/1-2": -1.3137449026107788, "rewards_train/1-l": -2.39085054397583, "rewards_train/1-w": 2.2527902126312256, "rewards_train/2-2": 2.3244643211364746, "rewards_train/2-w": -1.5881810188293457, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.643640756607056, "rewards_train/margins_1": 3.5665351152420044, "rewards_train/margins_2": 3.9126453399658203, "step": 442 }, { "epoch": 1.32, "logps_train/policy_1_2": -127.18954467773438, "logps_train/policy_1_l": -85.92377471923828, "logps_train/policy_1_w": -83.65664672851562, "logps_train/policy_2_2": -70.01798248291016, "logps_train/policy_2_w": -149.3300018310547, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -71.5, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -96.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -1.151766300201416, "rewards_train/1-l": -1.458003044128418, "rewards_train/1-w": 2.677987575531006, "rewards_train/2-2": 2.6411707401275635, "rewards_train/2-w": -1.409172534942627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.135990619659424, "rewards_train/margins_1": 3.829753875732422, "rewards_train/margins_2": 4.05034327507019, "step": 442 }, { "epoch": 1.32, "logps_train/policy_1_2": -118.56474304199219, "logps_train/policy_1_l": -152.86770629882812, "logps_train/policy_1_w": -106.9164047241211, "logps_train/policy_2_2": -74.08500671386719, "logps_train/policy_2_w": -167.67410278320312, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -0.6361626386642456, "rewards_train/1-l": -2.3545451164245605, "rewards_train/1-w": 3.016953229904175, "rewards_train/2-2": 2.2536091804504395, "rewards_train/2-w": -1.0845983028411865, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.371498346328735, "rewards_train/margins_1": 3.6531158685684204, "rewards_train/margins_2": 3.338207483291626, "step": 442 }, { "epoch": 1.33, "logps_train/policy_1_2": -128.49981689453125, "logps_train/policy_1_l": -143.5393829345703, "logps_train/policy_1_w": -114.63996887207031, "logps_train/policy_2_2": -83.16315460205078, "logps_train/policy_2_w": -173.3150177001953, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -0.11902397871017456, "rewards_train/1-l": -1.9819657802581787, "rewards_train/1-w": 3.204948663711548, "rewards_train/2-2": 2.7750909328460693, "rewards_train/2-w": -0.40415817499160767, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.186914443969727, "rewards_train/margins_1": 3.3239726424217224, "rewards_train/margins_2": 3.179249107837677, "step": 443 }, { "epoch": 1.33, "logps_train/policy_1_2": -145.6349639892578, "logps_train/policy_1_l": -173.69680786132812, "logps_train/policy_1_w": -123.30197143554688, "logps_train/policy_2_2": -100.6165771484375, "logps_train/policy_2_w": -184.51522827148438, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -0.40353527665138245, "rewards_train/1-l": -2.0436549186706543, "rewards_train/1-w": 3.1387486457824707, "rewards_train/2-2": 2.4930291175842285, "rewards_train/2-w": -0.8956637978553772, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.182403564453125, "rewards_train/margins_1": 3.542283922433853, "rewards_train/margins_2": 3.3886929154396057, "step": 443 }, { "epoch": 1.33, "logps_train/policy_1_2": -132.59556579589844, "logps_train/policy_1_l": -98.17173767089844, "logps_train/policy_1_w": -66.29495239257812, "logps_train/policy_2_2": -76.01492309570312, "logps_train/policy_2_w": -122.92357635498047, "logps_train/ref_1_2": -112.5, "logps_train/ref_1_l": -77.0, "logps_train/ref_1_w": -87.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -106.5, "rewards_train/1-2": -1.9797227382659912, "rewards_train/1-l": -2.0990097522735596, "rewards_train/1-w": 2.085543632507324, "rewards_train/2-2": 1.9121065139770508, "rewards_train/2-w": -1.6445059776306152, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.184553384780884, "rewards_train/margins_1": 4.065266370773315, "rewards_train/margins_2": 3.556612491607666, "step": 443 }, { "epoch": 1.33, "logps_train/policy_1_2": -204.27963256835938, "logps_train/policy_1_l": -213.02952575683594, "logps_train/policy_1_w": -151.6259307861328, "logps_train/policy_2_2": -127.92716217041016, "logps_train/policy_2_w": -246.90225219726562, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -1.7664401531219482, "rewards_train/1-l": -2.4683828353881836, "rewards_train/1-w": 3.8248093128204346, "rewards_train/2-2": 3.0727133750915527, "rewards_train/2-w": -2.1043858528137207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.293192148208618, "rewards_train/margins_1": 5.591249465942383, "rewards_train/margins_2": 5.177099227905273, "step": 443 }, { "epoch": 1.33, "learning_rate": 1.402592473146766e-06, "loss": 0.5983, "step": 444 }, { "epoch": 1.33, "logps_train/policy_1_2": -192.18768310546875, "logps_train/policy_1_l": -191.90501403808594, "logps_train/policy_1_w": -136.78607177734375, "logps_train/policy_2_2": -122.56993865966797, "logps_train/policy_2_w": -199.89016723632812, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.6203303337097168, "rewards_train/1-l": -2.667454719543457, "rewards_train/1-w": 2.4797921180725098, "rewards_train/2-2": 2.8230843544006348, "rewards_train/2-w": -1.5999534130096436, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.147246837615967, "rewards_train/margins_1": 4.100122451782227, "rewards_train/margins_2": 4.423037767410278, "step": 444 }, { "epoch": 1.33, "logps_train/policy_1_2": -171.5643768310547, "logps_train/policy_1_l": -161.59295654296875, "logps_train/policy_1_w": -106.43778991699219, "logps_train/policy_2_2": -116.25175476074219, "logps_train/policy_2_w": -163.19662475585938, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": -0.8224037289619446, "rewards_train/1-l": -1.7980167865753174, "rewards_train/1-w": 2.8162307739257812, "rewards_train/2-2": 2.8799033164978027, "rewards_train/2-w": -0.8868489265441895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.614247560501099, "rewards_train/margins_1": 3.638634502887726, "rewards_train/margins_2": 3.766752243041992, "step": 444 }, { "epoch": 1.33, "logps_train/policy_1_2": -272.1820983886719, "logps_train/policy_1_l": -253.3179931640625, "logps_train/policy_1_w": -144.12191772460938, "logps_train/policy_2_2": -169.66798400878906, "logps_train/policy_2_w": -240.281494140625, "logps_train/ref_1_2": -243.0, "logps_train/ref_1_l": -229.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -2.944380760192871, "rewards_train/1-l": -2.4241342544555664, "rewards_train/1-w": 3.09601092338562, "rewards_train/2-2": 3.216404438018799, "rewards_train/2-w": -2.9820570945739746, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.5201451778411865, "rewards_train/margins_1": 6.040391683578491, "rewards_train/margins_2": 6.198461532592773, "step": 444 }, { "epoch": 1.33, "logps_train/policy_1_2": -140.7806396484375, "logps_train/policy_1_l": -113.53251647949219, "logps_train/policy_1_w": -96.65791320800781, "logps_train/policy_2_2": -88.59971618652344, "logps_train/policy_2_w": -159.30612182617188, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": -0.7454946637153625, "rewards_train/1-l": -1.6336477994918823, "rewards_train/1-w": 2.743584394454956, "rewards_train/2-2": 2.6625874042510986, "rewards_train/2-w": -1.4165496826171875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.377232193946838, "rewards_train/margins_1": 3.4890790581703186, "rewards_train/margins_2": 4.079137086868286, "step": 444 }, { "epoch": 1.33, "logps_train/policy_1_2": -236.50750732421875, "logps_train/policy_1_l": -194.6469268798828, "logps_train/policy_1_w": -141.5247344970703, "logps_train/policy_2_2": -151.44790649414062, "logps_train/policy_2_w": -221.94992065429688, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -2.037471294403076, "rewards_train/1-l": -2.7475051879882812, "rewards_train/1-w": 3.449479818344116, "rewards_train/2-2": 3.3356778621673584, "rewards_train/2-w": -1.765108346939087, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.1969850063323975, "rewards_train/margins_1": 5.486951112747192, "rewards_train/margins_2": 5.100786209106445, "step": 445 }, { "epoch": 1.33, "logps_train/policy_1_2": -280.742431640625, "logps_train/policy_1_l": -216.6013641357422, "logps_train/policy_1_w": -138.19680786132812, "logps_train/policy_2_2": -172.28097534179688, "logps_train/policy_2_w": -214.1106414794922, "logps_train/ref_1_2": -245.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -3.59494686126709, "rewards_train/1-l": -2.8495900630950928, "rewards_train/1-w": 3.0607879161834717, "rewards_train/2-2": 3.864920139312744, "rewards_train/2-w": -1.589970588684082, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.9103779792785645, "rewards_train/margins_1": 6.6557347774505615, "rewards_train/margins_2": 5.454890727996826, "step": 445 }, { "epoch": 1.33, "logps_train/policy_1_2": -204.12420654296875, "logps_train/policy_1_l": -209.903076171875, "logps_train/policy_1_w": -124.03855895996094, "logps_train/policy_2_2": -124.76789093017578, "logps_train/policy_2_w": -214.03524780273438, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -2.1522626876831055, "rewards_train/1-l": -3.11765193939209, "rewards_train/1-w": 3.571974515914917, "rewards_train/2-2": 3.0767269134521484, "rewards_train/2-w": -1.9048919677734375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.689626455307007, "rewards_train/margins_1": 5.7242372035980225, "rewards_train/margins_2": 4.981618881225586, "step": 445 }, { "epoch": 1.33, "logps_train/policy_1_2": -170.96250915527344, "logps_train/policy_1_l": -192.28372192382812, "logps_train/policy_1_w": -118.66523742675781, "logps_train/policy_2_2": -119.5419921875, "logps_train/policy_2_w": -177.69400024414062, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.8673447370529175, "rewards_train/1-l": -2.3161160945892334, "rewards_train/1-w": 3.087285041809082, "rewards_train/2-2": 2.5989255905151367, "rewards_train/2-w": -0.7005521655082703, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.403401136398315, "rewards_train/margins_1": 3.9546297788619995, "rewards_train/margins_2": 3.299477756023407, "step": 445 }, { "epoch": 1.34, "learning_rate": 1.3804550570248431e-06, "loss": 0.4433, "step": 446 }, { "epoch": 1.34, "logps_train/policy_1_2": -158.9452362060547, "logps_train/policy_1_l": -190.5544891357422, "logps_train/policy_1_w": -137.03929138183594, "logps_train/policy_2_2": -97.2158203125, "logps_train/policy_2_w": -218.10916137695312, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -1.1628823280334473, "rewards_train/1-l": -2.3652148246765137, "rewards_train/1-w": 3.867946147918701, "rewards_train/2-2": 2.767871379852295, "rewards_train/2-w": -1.464041829109192, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.233160972595215, "rewards_train/margins_1": 5.030828475952148, "rewards_train/margins_2": 4.231913208961487, "step": 446 }, { "epoch": 1.34, "logps_train/policy_1_2": -148.5208282470703, "logps_train/policy_1_l": -143.10861206054688, "logps_train/policy_1_w": -95.14697265625, "logps_train/policy_2_2": -92.47824096679688, "logps_train/policy_2_w": -145.55934143066406, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -1.2518877983093262, "rewards_train/1-l": -1.7833220958709717, "rewards_train/1-w": 2.550976514816284, "rewards_train/2-2": 2.4375758171081543, "rewards_train/2-w": -0.7952407002449036, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.334298610687256, "rewards_train/margins_1": 3.8028643131256104, "rewards_train/margins_2": 3.232816517353058, "step": 446 }, { "epoch": 1.34, "logps_train/policy_1_2": -174.66921997070312, "logps_train/policy_1_l": -175.52719116210938, "logps_train/policy_1_w": -106.30473327636719, "logps_train/policy_2_2": -106.94361877441406, "logps_train/policy_2_w": -175.71202087402344, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.0872342586517334, "rewards_train/1-l": -2.7767422199249268, "rewards_train/1-w": 2.8672804832458496, "rewards_train/2-2": 3.2853260040283203, "rewards_train/2-w": -1.523056983947754, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.644022703170776, "rewards_train/margins_1": 3.954514741897583, "rewards_train/margins_2": 4.808382987976074, "step": 446 }, { "epoch": 1.34, "logps_train/policy_1_2": -213.1461944580078, "logps_train/policy_1_l": -185.57546997070312, "logps_train/policy_1_w": -113.7623062133789, "logps_train/policy_2_2": -143.97589111328125, "logps_train/policy_2_w": -180.04119873046875, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.4966497421264648, "rewards_train/1-l": -1.89621901512146, "rewards_train/1-w": 3.194667339324951, "rewards_train/2-2": 3.4399096965789795, "rewards_train/2-w": -1.4248230457305908, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.090886354446411, "rewards_train/margins_1": 4.691317081451416, "rewards_train/margins_2": 4.86473274230957, "step": 446 }, { "epoch": 1.34, "logps_train/policy_1_2": -189.72821044921875, "logps_train/policy_1_l": -197.758544921875, "logps_train/policy_1_w": -108.3488540649414, "logps_train/policy_2_2": -121.65826416015625, "logps_train/policy_2_w": -181.16754150390625, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -2.0105175971984863, "rewards_train/1-l": -2.4514408111572266, "rewards_train/1-w": 3.0803489685058594, "rewards_train/2-2": 2.700385093688965, "rewards_train/2-w": -1.2722222805023193, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.531789779663086, "rewards_train/margins_1": 5.090866565704346, "rewards_train/margins_2": 3.972607374191284, "step": 447 }, { "epoch": 1.34, "logps_train/policy_1_2": -146.77430725097656, "logps_train/policy_1_l": -139.11044311523438, "logps_train/policy_1_w": -91.5942611694336, "logps_train/policy_2_2": -82.23889923095703, "logps_train/policy_2_w": -150.97900390625, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -1.5569223165512085, "rewards_train/1-l": -1.9747169017791748, "rewards_train/1-w": 2.4798319339752197, "rewards_train/2-2": 2.6501336097717285, "rewards_train/2-w": -1.3691898584365845, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.4545488357543945, "rewards_train/margins_1": 4.036754250526428, "rewards_train/margins_2": 4.019323468208313, "step": 447 }, { "epoch": 1.34, "logps_train/policy_1_2": -192.43780517578125, "logps_train/policy_1_l": -190.35452270507812, "logps_train/policy_1_w": -125.65946960449219, "logps_train/policy_2_2": -129.06475830078125, "logps_train/policy_2_w": -200.62765502929688, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.1746407747268677, "rewards_train/1-l": -2.0333034992218018, "rewards_train/1-w": 3.289522171020508, "rewards_train/2-2": 2.96110200881958, "rewards_train/2-w": -1.3666727542877197, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.32282567024231, "rewards_train/margins_1": 4.4641629457473755, "rewards_train/margins_2": 4.3277747631073, "step": 447 }, { "epoch": 1.34, "logps_train/policy_1_2": -232.3035430908203, "logps_train/policy_1_l": -191.47097778320312, "logps_train/policy_1_w": -128.1549530029297, "logps_train/policy_2_2": -149.88604736328125, "logps_train/policy_2_w": -223.59262084960938, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -2.169417381286621, "rewards_train/1-l": -3.05198073387146, "rewards_train/1-w": 3.414973258972168, "rewards_train/2-2": 3.352019786834717, "rewards_train/2-w": -2.704575300216675, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.466953992843628, "rewards_train/margins_1": 5.584390640258789, "rewards_train/margins_2": 6.056595087051392, "step": 447 }, { "epoch": 1.34, "learning_rate": 1.3584269065157175e-06, "loss": 0.5363, "step": 448 }, { "epoch": 1.34, "logps_train/policy_1_2": -179.06976318359375, "logps_train/policy_1_l": -152.75299072265625, "logps_train/policy_1_w": -116.53547668457031, "logps_train/policy_2_2": -123.53411865234375, "logps_train/policy_2_w": -182.12889099121094, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -1.004632830619812, "rewards_train/1-l": -1.7501517534255981, "rewards_train/1-w": 3.156998634338379, "rewards_train/2-2": 2.9426825046539307, "rewards_train/2-w": -1.0804672241210938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.907150387763977, "rewards_train/margins_1": 4.161631464958191, "rewards_train/margins_2": 4.023149728775024, "step": 448 }, { "epoch": 1.34, "logps_train/policy_1_2": -195.7481231689453, "logps_train/policy_1_l": -169.91671752929688, "logps_train/policy_1_w": -126.8497085571289, "logps_train/policy_2_2": -133.2751007080078, "logps_train/policy_2_w": -200.39793395996094, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.9836013317108154, "rewards_train/1-l": -2.7051494121551514, "rewards_train/1-w": 3.6458888053894043, "rewards_train/2-2": 2.9902634620666504, "rewards_train/2-w": -0.8374499082565308, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.351038217544556, "rewards_train/margins_1": 4.62949013710022, "rewards_train/margins_2": 3.827713370323181, "step": 448 }, { "epoch": 1.34, "logps_train/policy_1_2": -237.15093994140625, "logps_train/policy_1_l": -181.23858642578125, "logps_train/policy_1_w": -108.06492614746094, "logps_train/policy_2_2": -164.24053955078125, "logps_train/policy_2_w": -177.50308227539062, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.631500244140625, "rewards_train/1-l": -2.2939274311065674, "rewards_train/1-w": 3.022413730621338, "rewards_train/2-2": 3.0519230365753174, "rewards_train/2-w": -1.1776505708694458, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.316341161727905, "rewards_train/margins_1": 4.653913974761963, "rewards_train/margins_2": 4.229573607444763, "step": 448 }, { "epoch": 1.34, "logps_train/policy_1_2": -158.007568359375, "logps_train/policy_1_l": -165.85536193847656, "logps_train/policy_1_w": -115.17674255371094, "logps_train/policy_2_2": -98.387939453125, "logps_train/policy_2_w": -191.0347137451172, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.4925527572631836, "rewards_train/1-l": -2.006800651550293, "rewards_train/1-w": 2.91318416595459, "rewards_train/2-2": 2.4701900482177734, "rewards_train/2-w": -2.052689552307129, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.919984817504883, "rewards_train/margins_1": 4.405736923217773, "rewards_train/margins_2": 4.522879600524902, "step": 448 }, { "epoch": 1.34, "logps_train/policy_1_2": -158.863525390625, "logps_train/policy_1_l": -176.49749755859375, "logps_train/policy_1_w": -130.13514709472656, "logps_train/policy_2_2": -105.78216552734375, "logps_train/policy_2_w": -208.330810546875, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -0.9386952519416809, "rewards_train/1-l": -2.2282662391662598, "rewards_train/1-w": 3.3310165405273438, "rewards_train/2-2": 2.6499085426330566, "rewards_train/2-w": -1.514331579208374, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.5592827796936035, "rewards_train/margins_1": 4.269711792469025, "rewards_train/margins_2": 4.164240121841431, "step": 449 }, { "epoch": 1.34, "logps_train/policy_1_2": -285.923095703125, "logps_train/policy_1_l": -239.7527313232422, "logps_train/policy_1_w": -173.12266540527344, "logps_train/policy_2_2": -183.8560028076172, "logps_train/policy_2_w": -286.90911865234375, "logps_train/ref_1_2": -258.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -211.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -255.0, "rewards_train/1-2": -2.8110599517822266, "rewards_train/1-l": -2.8674607276916504, "rewards_train/1-w": 3.819765090942383, "rewards_train/2-2": 3.8397908210754395, "rewards_train/2-w": -3.2034125328063965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.687225818634033, "rewards_train/margins_1": 6.630825042724609, "rewards_train/margins_2": 7.043203353881836, "step": 449 }, { "epoch": 1.34, "logps_train/policy_1_2": -147.76519775390625, "logps_train/policy_1_l": -181.57028198242188, "logps_train/policy_1_w": -88.57540893554688, "logps_train/policy_2_2": -104.2777328491211, "logps_train/policy_2_w": -127.04328918457031, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": -0.6442440152168274, "rewards_train/1-l": -2.3036584854125977, "rewards_train/1-w": 2.39514422416687, "rewards_train/2-2": 2.3371193408966064, "rewards_train/2-w": -0.15889494121074677, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.698802709579468, "rewards_train/margins_1": 3.0393882393836975, "rewards_train/margins_2": 2.496014282107353, "step": 449 }, { "epoch": 1.34, "logps_train/policy_1_2": -147.91529846191406, "logps_train/policy_1_l": -197.931884765625, "logps_train/policy_1_w": -94.29685974121094, "logps_train/policy_2_2": -96.71660614013672, "logps_train/policy_2_w": -148.21197509765625, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -0.8868425488471985, "rewards_train/1-l": -2.5470452308654785, "rewards_train/1-w": 2.611720561981201, "rewards_train/2-2": 2.5244333744049072, "rewards_train/2-w": -0.7133849859237671, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.15876579284668, "rewards_train/margins_1": 3.4985631108283997, "rewards_train/margins_2": 3.2378183603286743, "step": 449 }, { "epoch": 1.35, "learning_rate": 1.3365101715280473e-06, "loss": 0.5051, "step": 450 }, { "epoch": 1.35, "logps_train/policy_1_2": -187.001708984375, "logps_train/policy_1_l": -189.97743225097656, "logps_train/policy_1_w": -121.81301879882812, "logps_train/policy_2_2": -112.94446563720703, "logps_train/policy_2_w": -209.91412353515625, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.7786860466003418, "rewards_train/1-l": -2.5484261512756348, "rewards_train/1-w": 3.4175264835357666, "rewards_train/2-2": 2.993443250656128, "rewards_train/2-w": -2.424225330352783, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.965952634811401, "rewards_train/margins_1": 5.196212530136108, "rewards_train/margins_2": 5.417668581008911, "step": 450 }, { "epoch": 1.35, "logps_train/policy_1_2": -183.12123107910156, "logps_train/policy_1_l": -148.75518798828125, "logps_train/policy_1_w": -111.08756256103516, "logps_train/policy_2_2": -119.89420318603516, "logps_train/policy_2_w": -165.4687042236328, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -0.8487450480461121, "rewards_train/1-l": -2.1111631393432617, "rewards_train/1-w": 3.2056963443756104, "rewards_train/2-2": 3.104720115661621, "rewards_train/2-w": -0.3203073740005493, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.316859483718872, "rewards_train/margins_1": 4.054441392421722, "rewards_train/margins_2": 3.4250274896621704, "step": 450 }, { "epoch": 1.35, "logps_train/policy_1_2": -173.42559814453125, "logps_train/policy_1_l": -165.39556884765625, "logps_train/policy_1_w": -114.73516845703125, "logps_train/policy_2_2": -106.28524780273438, "logps_train/policy_2_w": -182.67532348632812, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.6281061172485352, "rewards_train/1-l": -2.7160730361938477, "rewards_train/1-w": 3.0489439964294434, "rewards_train/2-2": 2.942446708679199, "rewards_train/2-w": -1.6155800819396973, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.765017032623291, "rewards_train/margins_1": 4.6770501136779785, "rewards_train/margins_2": 4.5580267906188965, "step": 450 }, { "epoch": 1.35, "logps_train/policy_1_2": -173.66766357421875, "logps_train/policy_1_l": -173.7141876220703, "logps_train/policy_1_w": -105.93343353271484, "logps_train/policy_2_2": -107.20770263671875, "logps_train/policy_2_w": -167.06649780273438, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -0.8628603219985962, "rewards_train/1-l": -2.3917009830474854, "rewards_train/1-w": 2.8152506351470947, "rewards_train/2-2": 3.284698486328125, "rewards_train/2-w": -0.9190512895584106, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.20695161819458, "rewards_train/margins_1": 3.678110957145691, "rewards_train/margins_2": 4.203749775886536, "step": 450 }, { "epoch": 1.35, "logps_train/policy_1_2": -311.28558349609375, "logps_train/policy_1_l": -294.93524169921875, "logps_train/policy_1_w": -122.8981704711914, "logps_train/policy_2_2": -203.99221801757812, "logps_train/policy_2_w": -206.09298706054688, "logps_train/ref_1_2": -282.0, "logps_train/ref_1_l": -262.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -247.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": -3.0273852348327637, "rewards_train/1-l": -3.239816188812256, "rewards_train/1-w": 3.2238547801971436, "rewards_train/2-2": 4.286716461181641, "rewards_train/2-w": -1.7270712852478027, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.463670969009399, "rewards_train/margins_1": 6.251240015029907, "rewards_train/margins_2": 6.013787746429443, "step": 451 }, { "epoch": 1.35, "logps_train/policy_1_2": -163.96725463867188, "logps_train/policy_1_l": -150.99325561523438, "logps_train/policy_1_w": -151.1488037109375, "logps_train/policy_2_2": -114.0860595703125, "logps_train/policy_2_w": -235.15133666992188, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -0.7873502969741821, "rewards_train/1-l": -1.9260835647583008, "rewards_train/1-w": 3.67183780670166, "rewards_train/2-2": 2.572643756866455, "rewards_train/2-w": -2.055757999420166, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.597921371459961, "rewards_train/margins_1": 4.459188103675842, "rewards_train/margins_2": 4.628401756286621, "step": 451 }, { "epoch": 1.35, "logps_train/policy_1_2": -231.7354736328125, "logps_train/policy_1_l": -235.31231689453125, "logps_train/policy_1_w": -172.9691162109375, "logps_train/policy_2_2": -155.6427001953125, "logps_train/policy_2_w": -252.9477996826172, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -211.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": -0.9997178316116333, "rewards_train/1-l": -2.7996890544891357, "rewards_train/1-w": 3.79547119140625, "rewards_train/2-2": 4.091785430908203, "rewards_train/2-w": -1.553375005722046, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.595160245895386, "rewards_train/margins_1": 4.795189023017883, "rewards_train/margins_2": 5.645160436630249, "step": 451 }, { "epoch": 1.35, "logps_train/policy_1_2": -119.03262329101562, "logps_train/policy_1_l": -111.72762298583984, "logps_train/policy_1_w": -67.33026123046875, "logps_train/policy_2_2": -80.6257095336914, "logps_train/policy_2_w": -105.00759887695312, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -95.5, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -101.5, "rewards_train/1-2": -0.7495514750480652, "rewards_train/1-l": -1.616609811782837, "rewards_train/1-w": 1.8888497352600098, "rewards_train/2-2": 1.8382105827331543, "rewards_train/2-w": -0.3659936189651489, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.5054595470428467, "rewards_train/margins_1": 2.638401210308075, "rewards_train/margins_2": 2.2042042016983032, "step": 451 }, { "epoch": 1.35, "learning_rate": 1.31470699109653e-06, "loss": 0.5044, "step": 452 }, { "epoch": 1.35, "logps_train/policy_1_2": -140.077392578125, "logps_train/policy_1_l": -141.72091674804688, "logps_train/policy_1_w": -76.81234741210938, "logps_train/policy_2_2": -87.91201782226562, "logps_train/policy_2_w": -123.08654022216797, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -105.5, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": -1.5434808731079102, "rewards_train/1-l": -1.443161964416504, "rewards_train/1-w": 1.8880033493041992, "rewards_train/2-2": 1.743173360824585, "rewards_train/2-w": -0.9082642793655396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.331165313720703, "rewards_train/margins_1": 3.4314842224121094, "rewards_train/margins_2": 2.6514376401901245, "step": 452 }, { "epoch": 1.35, "logps_train/policy_1_2": -176.64544677734375, "logps_train/policy_1_l": -174.5198974609375, "logps_train/policy_1_w": -112.83454895019531, "logps_train/policy_2_2": -116.46196746826172, "logps_train/policy_2_w": -179.68960571289062, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": -0.7650330066680908, "rewards_train/1-l": -2.606678009033203, "rewards_train/1-w": 3.0380287170410156, "rewards_train/2-2": 2.9283156394958496, "rewards_train/2-w": -1.3121249675750732, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.644706726074219, "rewards_train/margins_1": 3.8030617237091064, "rewards_train/margins_2": 4.240440607070923, "step": 452 }, { "epoch": 1.35, "logps_train/policy_1_2": -164.3592529296875, "logps_train/policy_1_l": -212.1265869140625, "logps_train/policy_1_w": -155.2235565185547, "logps_train/policy_2_2": -112.78492736816406, "logps_train/policy_2_w": -219.79022216796875, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -0.7533081769943237, "rewards_train/1-l": -3.051037311553955, "rewards_train/1-w": 3.4760818481445312, "rewards_train/2-2": 2.6990461349487305, "rewards_train/2-w": -0.7378127574920654, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.527119159698486, "rewards_train/margins_1": 4.229390025138855, "rewards_train/margins_2": 3.436858892440796, "step": 452 }, { "epoch": 1.35, "logps_train/policy_1_2": -193.933837890625, "logps_train/policy_1_l": -143.00283813476562, "logps_train/policy_1_w": -127.68162536621094, "logps_train/policy_2_2": -113.83546447753906, "logps_train/policy_2_w": -210.40518188476562, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.9961180686950684, "rewards_train/1-l": -2.3211822509765625, "rewards_train/1-w": 3.1037137508392334, "rewards_train/2-2": 3.2318830490112305, "rewards_train/2-w": -2.3388586044311523, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.424896001815796, "rewards_train/margins_1": 5.099831819534302, "rewards_train/margins_2": 5.570741653442383, "step": 452 }, { "epoch": 1.36, "logps_train/policy_1_2": -195.01089477539062, "logps_train/policy_1_l": -173.58941650390625, "logps_train/policy_1_w": -136.97279357910156, "logps_train/policy_2_2": -130.39309692382812, "logps_train/policy_2_w": -206.31387329101562, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -1.0592910051345825, "rewards_train/1-l": -2.0897035598754883, "rewards_train/1-w": 2.8408055305480957, "rewards_train/2-2": 3.305222511291504, "rewards_train/2-w": -1.2042393684387207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.930509090423584, "rewards_train/margins_1": 3.9000965356826782, "rewards_train/margins_2": 4.509461879730225, "step": 453 }, { "epoch": 1.36, "logps_train/policy_1_2": -211.02188110351562, "logps_train/policy_1_l": -176.2194366455078, "logps_train/policy_1_w": -98.13252258300781, "logps_train/policy_2_2": -132.4381103515625, "logps_train/policy_2_w": -178.357421875, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": -2.321720600128174, "rewards_train/1-l": -1.9211621284484863, "rewards_train/1-w": 2.9484667778015137, "rewards_train/2-2": 3.1684930324554443, "rewards_train/2-w": -2.318751335144043, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.86962890625, "rewards_train/margins_1": 5.2701873779296875, "rewards_train/margins_2": 5.487244367599487, "step": 453 }, { "epoch": 1.36, "logps_train/policy_1_2": -152.51495361328125, "logps_train/policy_1_l": -141.8433837890625, "logps_train/policy_1_w": -111.59954071044922, "logps_train/policy_2_2": -86.54838562011719, "logps_train/policy_2_w": -188.93048095703125, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.7065739631652832, "rewards_train/1-l": -2.6071019172668457, "rewards_train/1-w": 2.9389710426330566, "rewards_train/2-2": 2.6732864379882812, "rewards_train/2-w": -2.1173653602600098, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.546072959899902, "rewards_train/margins_1": 4.64554500579834, "rewards_train/margins_2": 4.790651798248291, "step": 453 }, { "epoch": 1.36, "logps_train/policy_1_2": -188.7757568359375, "logps_train/policy_1_l": -144.3535919189453, "logps_train/policy_1_w": -123.28413391113281, "logps_train/policy_2_2": -135.33035278320312, "logps_train/policy_2_w": -186.89132690429688, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.0584360361099243, "rewards_train/1-l": -2.236629009246826, "rewards_train/1-w": 3.1104531288146973, "rewards_train/2-2": 2.7029991149902344, "rewards_train/2-w": -1.5129612684249878, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.347082138061523, "rewards_train/margins_1": 4.168889164924622, "rewards_train/margins_2": 4.215960383415222, "step": 453 }, { "epoch": 1.36, "learning_rate": 1.2930194931731382e-06, "loss": 0.6037, "step": 454 }, { "epoch": 1.36, "logps_train/policy_1_2": -177.64324951171875, "logps_train/policy_1_l": -188.95046997070312, "logps_train/policy_1_w": -100.83192443847656, "logps_train/policy_2_2": -119.64583587646484, "logps_train/policy_2_w": -169.6533966064453, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -1.4084653854370117, "rewards_train/1-l": -2.4048118591308594, "rewards_train/1-w": 3.104600667953491, "rewards_train/2-2": 2.435220956802368, "rewards_train/2-w": -1.3522531986236572, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.509412527084351, "rewards_train/margins_1": 4.513066053390503, "rewards_train/margins_2": 3.7874741554260254, "step": 454 }, { "epoch": 1.36, "logps_train/policy_1_2": -206.25294494628906, "logps_train/policy_1_l": -150.9187469482422, "logps_train/policy_1_w": -95.97833251953125, "logps_train/policy_2_2": -135.30088806152344, "logps_train/policy_2_w": -171.47955322265625, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -2.2803714275360107, "rewards_train/1-l": -1.7630671262741089, "rewards_train/1-w": 2.9439640045166016, "rewards_train/2-2": 2.6932506561279297, "rewards_train/2-w": -2.243072509765625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.7070311307907104, "rewards_train/margins_1": 5.224335432052612, "rewards_train/margins_2": 4.936323165893555, "step": 454 }, { "epoch": 1.36, "logps_train/policy_1_2": -162.3838653564453, "logps_train/policy_1_l": -157.62161254882812, "logps_train/policy_1_w": -92.30644226074219, "logps_train/policy_2_2": -111.92584991455078, "logps_train/policy_2_w": -151.69296264648438, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.0200273990631104, "rewards_train/1-l": -2.037893295288086, "rewards_train/1-w": 2.8650588989257812, "rewards_train/2-2": 2.530852794647217, "rewards_train/2-w": -1.1243736743927002, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.902952194213867, "rewards_train/margins_1": 3.8850862979888916, "rewards_train/margins_2": 3.655226469039917, "step": 454 }, { "epoch": 1.36, "logps_train/policy_1_2": -205.75164794921875, "logps_train/policy_1_l": -182.31610107421875, "logps_train/policy_1_w": -135.145263671875, "logps_train/policy_2_2": -142.84130859375, "logps_train/policy_2_w": -205.54193115234375, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -1.4204752445220947, "rewards_train/1-l": -2.3431339263916016, "rewards_train/1-w": 3.801098108291626, "rewards_train/2-2": 3.016651153564453, "rewards_train/2-w": -0.9614204168319702, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.1442320346832275, "rewards_train/margins_1": 5.221573352813721, "rewards_train/margins_2": 3.9780715703964233, "step": 454 }, { "epoch": 1.36, "logps_train/policy_1_2": -185.0052490234375, "logps_train/policy_1_l": -227.04409790039062, "logps_train/policy_1_w": -131.14126586914062, "logps_train/policy_2_2": -121.12335205078125, "logps_train/policy_2_w": -202.4043731689453, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -1.0009158849716187, "rewards_train/1-l": -2.251821279525757, "rewards_train/1-w": 3.476499080657959, "rewards_train/2-2": 3.108367443084717, "rewards_train/2-w": -0.9490313529968262, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.728320360183716, "rewards_train/margins_1": 4.477414965629578, "rewards_train/margins_2": 4.057398796081543, "step": 455 }, { "epoch": 1.36, "logps_train/policy_1_2": -171.70309448242188, "logps_train/policy_1_l": -178.3574981689453, "logps_train/policy_1_w": -128.5065460205078, "logps_train/policy_2_2": -114.09322357177734, "logps_train/policy_2_w": -192.31869506835938, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -1.3326135873794556, "rewards_train/1-l": -2.212996244430542, "rewards_train/1-w": 3.192901134490967, "rewards_train/2-2": 2.422318458557129, "rewards_train/2-w": -1.4109700918197632, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.405897378921509, "rewards_train/margins_1": 4.525514721870422, "rewards_train/margins_2": 3.833288550376892, "step": 455 }, { "epoch": 1.36, "logps_train/policy_1_2": -178.66616821289062, "logps_train/policy_1_l": -214.5784912109375, "logps_train/policy_1_w": -115.07865905761719, "logps_train/policy_2_2": -115.34834289550781, "logps_train/policy_2_w": -186.8712158203125, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -1.4697415828704834, "rewards_train/1-l": -3.6301164627075195, "rewards_train/1-w": 2.885004997253418, "rewards_train/2-2": 2.8093068599700928, "rewards_train/2-w": -1.620911955833435, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.5151214599609375, "rewards_train/margins_1": 4.354746580123901, "rewards_train/margins_2": 4.430218815803528, "step": 455 }, { "epoch": 1.36, "logps_train/policy_1_2": -181.23805236816406, "logps_train/policy_1_l": -216.968017578125, "logps_train/policy_1_w": -119.29254150390625, "logps_train/policy_2_2": -130.86102294921875, "logps_train/policy_2_w": -177.78384399414062, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -0.279273122549057, "rewards_train/1-l": -2.4796128273010254, "rewards_train/1-w": 3.08597993850708, "rewards_train/2-2": 3.0260071754455566, "rewards_train/2-w": -0.5740877389907837, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.5655927658081055, "rewards_train/margins_1": 3.365253061056137, "rewards_train/margins_2": 3.6000949144363403, "step": 455 }, { "epoch": 1.37, "learning_rate": 1.2714497944194376e-06, "loss": 0.5829, "step": 456 }, { "epoch": 1.37, "logps_train/policy_1_2": -156.44918823242188, "logps_train/policy_1_l": -150.22555541992188, "logps_train/policy_1_w": -87.62406921386719, "logps_train/policy_2_2": -96.29612731933594, "logps_train/policy_2_w": -137.87759399414062, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -1.6214807033538818, "rewards_train/1-l": -2.6018524169921875, "rewards_train/1-w": 2.686030387878418, "rewards_train/2-2": 2.566481113433838, "rewards_train/2-w": -0.8162745237350464, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.2878828048706055, "rewards_train/margins_1": 4.3075110912323, "rewards_train/margins_2": 3.3827556371688843, "step": 456 }, { "epoch": 1.37, "logps_train/policy_1_2": -217.85928344726562, "logps_train/policy_1_l": -254.1007080078125, "logps_train/policy_1_w": -147.20230102539062, "logps_train/policy_2_2": -137.83978271484375, "logps_train/policy_2_w": -218.96682739257812, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -221.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -1.9870991706848145, "rewards_train/1-l": -3.3393678665161133, "rewards_train/1-w": 3.4981296062469482, "rewards_train/2-2": 3.4199278354644775, "rewards_train/2-w": -1.2250025272369385, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.8374974727630615, "rewards_train/margins_1": 5.485228776931763, "rewards_train/margins_2": 4.644930362701416, "step": 456 }, { "epoch": 1.37, "logps_train/policy_1_2": -226.92840576171875, "logps_train/policy_1_l": -149.38711547851562, "logps_train/policy_1_w": -119.98979187011719, "logps_train/policy_2_2": -136.80047607421875, "logps_train/policy_2_w": -204.23406982421875, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -2.3920602798461914, "rewards_train/1-l": -2.0525782108306885, "rewards_train/1-w": 3.4615681171417236, "rewards_train/2-2": 3.4504222869873047, "rewards_train/2-w": -2.1030941009521484, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.514146327972412, "rewards_train/margins_1": 5.853628396987915, "rewards_train/margins_2": 5.553516387939453, "step": 456 }, { "epoch": 1.37, "logps_train/policy_1_2": -142.71109008789062, "logps_train/policy_1_l": -111.6103515625, "logps_train/policy_1_w": -74.66256713867188, "logps_train/policy_2_2": -99.09512329101562, "logps_train/policy_2_w": -113.40216827392578, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": -0.6203710436820984, "rewards_train/1-l": -1.7929139137268066, "rewards_train/1-w": 2.6102633476257324, "rewards_train/2-2": 2.561124324798584, "rewards_train/2-w": -0.028565824031829834, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.403177261352539, "rewards_train/margins_1": 3.230634391307831, "rewards_train/margins_2": 2.589690148830414, "step": 456 }, { "epoch": 1.37, "logps_train/policy_1_2": -188.99588012695312, "logps_train/policy_1_l": -203.67391967773438, "logps_train/policy_1_w": -86.67901611328125, "logps_train/policy_2_2": -124.88639831542969, "logps_train/policy_2_w": -136.34036254882812, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": -1.7038846015930176, "rewards_train/1-l": -2.875546455383301, "rewards_train/1-w": 2.6380069255828857, "rewards_train/2-2": 2.6336255073547363, "rewards_train/2-w": -0.515482485294342, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.5135533809661865, "rewards_train/margins_1": 4.341891527175903, "rewards_train/margins_2": 3.1491079926490784, "step": 457 }, { "epoch": 1.37, "logps_train/policy_1_2": -207.4479217529297, "logps_train/policy_1_l": -162.23388671875, "logps_train/policy_1_w": -115.86399841308594, "logps_train/policy_2_2": -134.05796813964844, "logps_train/policy_2_w": -188.06597900390625, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -2.083073377609253, "rewards_train/1-l": -2.3255364894866943, "rewards_train/1-w": 3.461256504058838, "rewards_train/2-2": 2.6719374656677246, "rewards_train/2-w": -0.8345285058021545, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.786792993545532, "rewards_train/margins_1": 5.544329881668091, "rewards_train/margins_2": 3.506465971469879, "step": 457 }, { "epoch": 1.37, "logps_train/policy_1_2": -213.31361389160156, "logps_train/policy_1_l": -155.3280029296875, "logps_train/policy_1_w": -96.95127868652344, "logps_train/policy_2_2": -127.02633666992188, "logps_train/policy_2_w": -173.04937744140625, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -2.530580997467041, "rewards_train/1-l": -2.0306029319763184, "rewards_train/1-w": 2.6388566493988037, "rewards_train/2-2": 3.028615951538086, "rewards_train/2-w": -2.1981992721557617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.669459581375122, "rewards_train/margins_1": 5.169437646865845, "rewards_train/margins_2": 5.226815223693848, "step": 457 }, { "epoch": 1.37, "logps_train/policy_1_2": -224.15626525878906, "logps_train/policy_1_l": -191.8662567138672, "logps_train/policy_1_w": -88.52113342285156, "logps_train/policy_2_2": -136.6419677734375, "logps_train/policy_2_w": -152.12319946289062, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -3.106642246246338, "rewards_train/1-l": -2.860600233078003, "rewards_train/1-w": 2.463413715362549, "rewards_train/2-2": 3.1549441814422607, "rewards_train/2-w": -1.735074758529663, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.324013948440552, "rewards_train/margins_1": 5.570055961608887, "rewards_train/margins_2": 4.890018939971924, "step": 457 }, { "epoch": 1.37, "learning_rate": 1.2500000000000007e-06, "loss": 0.5589, "step": 458 }, { "epoch": 1.37, "logps_train/policy_1_2": -131.49880981445312, "logps_train/policy_1_l": -157.32733154296875, "logps_train/policy_1_w": -95.1653060913086, "logps_train/policy_2_2": -83.10081481933594, "logps_train/policy_2_w": -145.10134887695312, "logps_train/ref_1_2": -123.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": -0.8078895211219788, "rewards_train/1-l": -2.4059765338897705, "rewards_train/1-w": 2.5899150371551514, "rewards_train/2-2": 2.109987258911133, "rewards_train/2-w": -0.9644312858581543, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.995891571044922, "rewards_train/margins_1": 3.39780455827713, "rewards_train/margins_2": 3.074418544769287, "step": 458 }, { "epoch": 1.37, "logps_train/policy_1_2": -163.965087890625, "logps_train/policy_1_l": -132.89207458496094, "logps_train/policy_1_w": -105.02593994140625, "logps_train/policy_2_2": -99.0322265625, "logps_train/policy_2_w": -170.10006713867188, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -114.5, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -1.2449469566345215, "rewards_train/1-l": -1.8318835496902466, "rewards_train/1-w": 3.0981860160827637, "rewards_train/2-2": 2.8381834030151367, "rewards_train/2-w": -1.452976107597351, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.93006956577301, "rewards_train/margins_1": 4.343132972717285, "rewards_train/margins_2": 4.291159510612488, "step": 458 }, { "epoch": 1.37, "logps_train/policy_1_2": -211.5887451171875, "logps_train/policy_1_l": -172.11068725585938, "logps_train/policy_1_w": -129.40216064453125, "logps_train/policy_2_2": -137.59518432617188, "logps_train/policy_2_w": -184.42007446289062, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.451843023300171, "rewards_train/1-l": -2.6444671154022217, "rewards_train/1-w": 2.519354820251465, "rewards_train/2-2": 3.581888437271118, "rewards_train/2-w": -1.1240389347076416, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.1638219356536865, "rewards_train/margins_1": 3.9711978435516357, "rewards_train/margins_2": 4.70592737197876, "step": 458 }, { "epoch": 1.37, "logps_train/policy_1_2": -203.88003540039062, "logps_train/policy_1_l": -137.95217895507812, "logps_train/policy_1_w": -95.93070983886719, "logps_train/policy_2_2": -136.22113037109375, "logps_train/policy_2_w": -154.93063354492188, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -1.7165186405181885, "rewards_train/1-l": -1.8670426607131958, "rewards_train/1-w": 2.5521929264068604, "rewards_train/2-2": 2.7774956226348877, "rewards_train/2-w": -1.339938759803772, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.419235587120056, "rewards_train/margins_1": 4.268711566925049, "rewards_train/margins_2": 4.11743438243866, "step": 458 }, { "epoch": 1.37, "logps_train/policy_1_2": -91.61935424804688, "logps_train/policy_1_l": -100.18289184570312, "logps_train/policy_1_w": -60.593013763427734, "logps_train/policy_2_2": -47.23656463623047, "logps_train/policy_2_w": -103.4302978515625, "logps_train/ref_1_2": -80.0, "logps_train/ref_1_l": -80.0, "logps_train/ref_1_w": -80.0, "logps_train/ref_2_2": -67.0, "logps_train/ref_2_w": -94.5, "rewards_train/1-2": -1.171701192855835, "rewards_train/1-l": -1.9948759078979492, "rewards_train/1-w": 1.9076907634735107, "rewards_train/2-2": 1.9771249294281006, "rewards_train/2-w": -0.8994752764701843, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.90256667137146, "rewards_train/margins_1": 3.0793919563293457, "rewards_train/margins_2": 2.876600205898285, "step": 459 }, { "epoch": 1.37, "logps_train/policy_1_2": -176.10174560546875, "logps_train/policy_1_l": -186.49562072753906, "logps_train/policy_1_w": -126.63906860351562, "logps_train/policy_2_2": -119.71650695800781, "logps_train/policy_2_w": -183.01895141601562, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -1.0164246559143066, "rewards_train/1-l": -2.4800314903259277, "rewards_train/1-w": 3.2267181873321533, "rewards_train/2-2": 2.956082820892334, "rewards_train/2-w": -0.3979891538619995, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.706749677658081, "rewards_train/margins_1": 4.24314284324646, "rewards_train/margins_2": 3.3540719747543335, "step": 459 }, { "epoch": 1.37, "logps_train/policy_1_2": -196.04896545410156, "logps_train/policy_1_l": -184.4102783203125, "logps_train/policy_1_w": -119.05310821533203, "logps_train/policy_2_2": -120.10248565673828, "logps_train/policy_2_w": -200.23300170898438, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -2.0228657722473145, "rewards_train/1-l": -2.9692494869232178, "rewards_train/1-w": 3.4871702194213867, "rewards_train/2-2": 3.054008960723877, "rewards_train/2-w": -1.673493504524231, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.4564197063446045, "rewards_train/margins_1": 5.510035991668701, "rewards_train/margins_2": 4.727502465248108, "step": 459 }, { "epoch": 1.37, "logps_train/policy_1_2": -266.9113464355469, "logps_train/policy_1_l": -195.23220825195312, "logps_train/policy_1_w": -140.40248107910156, "logps_train/policy_2_2": -186.5640869140625, "logps_train/policy_2_w": -210.51901245117188, "logps_train/ref_1_2": -250.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -222.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.725704312324524, "rewards_train/1-l": -2.2040798664093018, "rewards_train/1-w": 3.0302605628967285, "rewards_train/2-2": 3.5728888511657715, "rewards_train/2-w": -1.2401831150054932, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.23434042930603, "rewards_train/margins_1": 4.755964875221252, "rewards_train/margins_2": 4.813071966171265, "step": 459 }, { "epoch": 1.38, "learning_rate": 1.2286722033769494e-06, "loss": 0.6491, "step": 460 }, { "epoch": 1.38, "logps_train/policy_1_2": -155.98353576660156, "logps_train/policy_1_l": -164.1321563720703, "logps_train/policy_1_w": -135.0337677001953, "logps_train/policy_2_2": -94.72055053710938, "logps_train/policy_2_w": -208.51776123046875, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.1436659097671509, "rewards_train/1-l": -2.4986162185668945, "rewards_train/1-w": 2.8309006690979004, "rewards_train/2-2": 2.64786696434021, "rewards_train/2-w": -1.854607343673706, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.329516887664795, "rewards_train/margins_1": 3.9745665788650513, "rewards_train/margins_2": 4.502474308013916, "step": 460 }, { "epoch": 1.38, "logps_train/policy_1_2": -150.30050659179688, "logps_train/policy_1_l": -131.8756561279297, "logps_train/policy_1_w": -78.37809753417969, "logps_train/policy_2_2": -101.52391052246094, "logps_train/policy_2_w": -129.96044921875, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -124.0, "rewards_train/1-2": -0.9103255271911621, "rewards_train/1-l": -2.1699628829956055, "rewards_train/1-w": 2.572737455368042, "rewards_train/2-2": 2.3618671894073486, "rewards_train/2-w": -0.5944818258285522, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.7427003383636475, "rewards_train/margins_1": 3.483062982559204, "rewards_train/margins_2": 2.956349015235901, "step": 460 }, { "epoch": 1.38, "logps_train/policy_1_2": -161.66213989257812, "logps_train/policy_1_l": -128.04428100585938, "logps_train/policy_1_w": -96.07323455810547, "logps_train/policy_2_2": -95.85359191894531, "logps_train/policy_2_w": -161.85787963867188, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -121.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -2.0525429248809814, "rewards_train/1-l": -1.9200283288955688, "rewards_train/1-w": 2.6912851333618164, "rewards_train/2-2": 2.5189366340637207, "rewards_train/2-w": -1.820650577545166, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.611313462257385, "rewards_train/margins_1": 4.743828058242798, "rewards_train/margins_2": 4.339587211608887, "step": 460 }, { "epoch": 1.38, "logps_train/policy_1_2": -159.91998291015625, "logps_train/policy_1_l": -163.63796997070312, "logps_train/policy_1_w": -105.5362548828125, "logps_train/policy_2_2": -95.32915496826172, "logps_train/policy_2_w": -173.29000854492188, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.3541563749313354, "rewards_train/1-l": -2.6247358322143555, "rewards_train/1-w": 2.998767137527466, "rewards_train/2-2": 2.7648868560791016, "rewards_train/2-w": -1.624118685722351, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.623502969741821, "rewards_train/margins_1": 4.352923512458801, "rewards_train/margins_2": 4.389005541801453, "step": 460 }, { "epoch": 1.38, "logps_train/policy_1_2": -183.25527954101562, "logps_train/policy_1_l": -172.4656219482422, "logps_train/policy_1_w": -82.66014099121094, "logps_train/policy_2_2": -121.7091293334961, "logps_train/policy_2_w": -142.40309143066406, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -108.5, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": -1.3692779541015625, "rewards_train/1-l": -1.9671679735183716, "rewards_train/1-w": 2.5762710571289062, "rewards_train/2-2": 2.5884621143341064, "rewards_train/2-w": -0.9377697706222534, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.543439030647278, "rewards_train/margins_1": 3.9455490112304688, "rewards_train/margins_2": 3.52623188495636, "step": 461 }, { "epoch": 1.38, "logps_train/policy_1_2": -249.58621215820312, "logps_train/policy_1_l": -199.9772186279297, "logps_train/policy_1_w": -121.30258178710938, "logps_train/policy_2_2": -166.59825134277344, "logps_train/policy_2_w": -183.66819763183594, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -2.0883078575134277, "rewards_train/1-l": -2.0276050567626953, "rewards_train/1-w": 3.0322422981262207, "rewards_train/2-2": 3.7171287536621094, "rewards_train/2-w": -1.2140860557556152, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.059847354888916, "rewards_train/margins_1": 5.120550155639648, "rewards_train/margins_2": 4.931214809417725, "step": 461 }, { "epoch": 1.38, "logps_train/policy_1_2": -118.86019897460938, "logps_train/policy_1_l": -117.48360443115234, "logps_train/policy_1_w": -82.11366271972656, "logps_train/policy_2_2": -71.31974792480469, "logps_train/policy_2_w": -149.04611206054688, "logps_train/ref_1_2": -104.5, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": -1.425081729888916, "rewards_train/1-l": -1.783125877380371, "rewards_train/1-w": 2.0495712757110596, "rewards_train/2-2": 1.5595650672912598, "rewards_train/2-w": -2.3698458671569824, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.8326971530914307, "rewards_train/margins_1": 3.4746530055999756, "rewards_train/margins_2": 3.929410934448242, "step": 461 }, { "epoch": 1.38, "logps_train/policy_1_2": -170.69000244140625, "logps_train/policy_1_l": -124.6068115234375, "logps_train/policy_1_w": -59.77308654785156, "logps_train/policy_2_2": -112.59908294677734, "logps_train/policy_2_w": -96.6024169921875, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -80.5, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -94.0, "rewards_train/1-2": -1.4346261024475098, "rewards_train/1-l": -1.839001178741455, "rewards_train/1-w": 2.066392421722412, "rewards_train/2-2": 2.466263771057129, "rewards_train/2-w": -0.2550659477710724, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.905393600463867, "rewards_train/margins_1": 3.501018524169922, "rewards_train/margins_2": 2.7213297188282013, "step": 461 }, { "epoch": 1.38, "learning_rate": 1.207468486105636e-06, "loss": 0.6529, "step": 462 }, { "epoch": 1.38, "logps_train/policy_1_2": -208.01980590820312, "logps_train/policy_1_l": -199.04234313964844, "logps_train/policy_1_w": -115.17958068847656, "logps_train/policy_2_2": -131.8028564453125, "logps_train/policy_2_w": -192.5200653076172, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.994558334350586, "rewards_train/1-l": -2.484605312347412, "rewards_train/1-w": 2.9316515922546387, "rewards_train/2-2": 2.9486329555511475, "rewards_train/2-w": -1.9868087768554688, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.416256904602051, "rewards_train/margins_1": 4.926209926605225, "rewards_train/margins_2": 4.935441732406616, "step": 462 }, { "epoch": 1.38, "logps_train/policy_1_2": -207.22882080078125, "logps_train/policy_1_l": -181.63592529296875, "logps_train/policy_1_w": -141.5785675048828, "logps_train/policy_2_2": -148.63204956054688, "logps_train/policy_2_w": -195.87771606445312, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -1.2517884969711304, "rewards_train/1-l": -2.473991870880127, "rewards_train/1-w": 3.2647995948791504, "rewards_train/2-2": 2.917067766189575, "rewards_train/2-w": -0.30378663539886475, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.738791465759277, "rewards_train/margins_1": 4.516588091850281, "rewards_train/margins_2": 3.22085440158844, "step": 462 }, { "epoch": 1.38, "logps_train/policy_1_2": -167.11624145507812, "logps_train/policy_1_l": -158.71397399902344, "logps_train/policy_1_w": -118.75068664550781, "logps_train/policy_2_2": -98.14094543457031, "logps_train/policy_2_w": -187.79397583007812, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -2.1864166259765625, "rewards_train/1-l": -2.6244125366210938, "rewards_train/1-w": 2.985722303390503, "rewards_train/2-2": 2.4913742542266846, "rewards_train/2-w": -1.5932656526565552, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.610134840011597, "rewards_train/margins_1": 5.172138929367065, "rewards_train/margins_2": 4.08463990688324, "step": 462 }, { "epoch": 1.38, "logps_train/policy_1_2": -261.54132080078125, "logps_train/policy_1_l": -185.71389770507812, "logps_train/policy_1_w": -100.05328369140625, "logps_train/policy_2_2": -168.59512329101562, "logps_train/policy_2_w": -172.03858947753906, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -207.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -2.6049137115478516, "rewards_train/1-l": -2.551931858062744, "rewards_train/1-w": 3.028607130050659, "rewards_train/2-2": 3.885800838470459, "rewards_train/2-w": -1.439772367477417, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.580538988113403, "rewards_train/margins_1": 5.633520841598511, "rewards_train/margins_2": 5.325573205947876, "step": 462 }, { "epoch": 1.39, "logps_train/policy_1_2": -129.0125732421875, "logps_train/policy_1_l": -204.74610900878906, "logps_train/policy_1_w": -125.80050659179688, "logps_train/policy_2_2": -93.44357299804688, "logps_train/policy_2_w": -200.58493041992188, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -112.5, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -0.5243039131164551, "rewards_train/1-l": -2.333254098892212, "rewards_train/1-w": 2.922976493835449, "rewards_train/2-2": 1.9269320964813232, "rewards_train/2-w": -1.551071047782898, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.256230592727661, "rewards_train/margins_1": 3.4472804069519043, "rewards_train/margins_2": 3.478003144264221, "step": 463 }, { "epoch": 1.39, "logps_train/policy_1_2": -166.97784423828125, "logps_train/policy_1_l": -144.1201171875, "logps_train/policy_1_w": -118.47853088378906, "logps_train/policy_2_2": -103.36260986328125, "logps_train/policy_2_w": -193.15951538085938, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.6501283645629883, "rewards_train/1-l": -2.6075191497802734, "rewards_train/1-w": 2.8732411861419678, "rewards_train/2-2": 2.771942615509033, "rewards_train/2-w": -2.1399736404418945, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.480760335922241, "rewards_train/margins_1": 4.523369550704956, "rewards_train/margins_2": 4.911916255950928, "step": 463 }, { "epoch": 1.39, "logps_train/policy_1_2": -188.13754272460938, "logps_train/policy_1_l": -169.05950927734375, "logps_train/policy_1_w": -125.99497985839844, "logps_train/policy_2_2": -129.41900634765625, "logps_train/policy_2_w": -202.21820068359375, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.2082860469818115, "rewards_train/1-l": -2.350921630859375, "rewards_train/1-w": 3.606166362762451, "rewards_train/2-2": 2.6686456203460693, "rewards_train/2-w": -1.582366943359375, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.957087993621826, "rewards_train/margins_1": 4.814452409744263, "rewards_train/margins_2": 4.251012563705444, "step": 463 }, { "epoch": 1.39, "logps_train/policy_1_2": -216.89096069335938, "logps_train/policy_1_l": -230.92686462402344, "logps_train/policy_1_w": -171.88253784179688, "logps_train/policy_2_2": -148.85836791992188, "logps_train/policy_2_w": -252.33575439453125, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -210.0, "logps_train/ref_1_w": -209.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -237.0, "rewards_train/1-2": -1.5431963205337524, "rewards_train/1-l": -2.0486435890197754, "rewards_train/1-w": 3.715115547180176, "rewards_train/2-2": 3.0872108936309814, "rewards_train/2-w": -1.5426585674285889, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.763759136199951, "rewards_train/margins_1": 5.258311867713928, "rewards_train/margins_2": 4.62986946105957, "step": 463 }, { "epoch": 1.39, "learning_rate": 1.1863909176314855e-06, "loss": 0.598, "step": 464 }, { "epoch": 1.39, "logps_train/policy_1_2": -251.44296264648438, "logps_train/policy_1_l": -232.97650146484375, "logps_train/policy_1_w": -153.52032470703125, "logps_train/policy_2_2": -168.62945556640625, "logps_train/policy_2_w": -244.93731689453125, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -1.993516445159912, "rewards_train/1-l": -3.014739751815796, "rewards_train/1-w": 3.950310230255127, "rewards_train/2-2": 3.672211170196533, "rewards_train/2-w": -1.8226377964019775, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.965049982070923, "rewards_train/margins_1": 5.943826675415039, "rewards_train/margins_2": 5.494848966598511, "step": 464 }, { "epoch": 1.39, "logps_train/policy_1_2": -236.86001586914062, "logps_train/policy_1_l": -164.98939514160156, "logps_train/policy_1_w": -109.92153930664062, "logps_train/policy_2_2": -140.2073974609375, "logps_train/policy_2_w": -177.26141357421875, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -2.5445966720581055, "rewards_train/1-l": -2.5567522048950195, "rewards_train/1-w": 2.828011989593506, "rewards_train/2-2": 3.853869915008545, "rewards_train/2-w": -1.363641619682312, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.384764194488525, "rewards_train/margins_1": 5.372608661651611, "rewards_train/margins_2": 5.217511534690857, "step": 464 }, { "epoch": 1.39, "logps_train/policy_1_2": -148.79563903808594, "logps_train/policy_1_l": -162.7332763671875, "logps_train/policy_1_w": -110.58834838867188, "logps_train/policy_2_2": -102.08114624023438, "logps_train/policy_2_w": -159.97000122070312, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.6701889038085938, "rewards_train/1-l": -2.9645392894744873, "rewards_train/1-w": 2.221243381500244, "rewards_train/2-2": 2.5164947509765625, "rewards_train/2-w": -1.218092679977417, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.1857826709747314, "rewards_train/margins_1": 2.891432285308838, "rewards_train/margins_2": 3.7345874309539795, "step": 464 }, { "epoch": 1.39, "logps_train/policy_1_2": -181.31869506835938, "logps_train/policy_1_l": -155.3297882080078, "logps_train/policy_1_w": -133.46786499023438, "logps_train/policy_2_2": -115.84159851074219, "logps_train/policy_2_w": -208.34104919433594, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -0.9881203174591064, "rewards_train/1-l": -2.8571009635925293, "rewards_train/1-w": 3.7184488773345947, "rewards_train/2-2": 3.593574047088623, "rewards_train/2-w": -0.9891833066940308, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.575549840927124, "rewards_train/margins_1": 4.706569194793701, "rewards_train/margins_2": 4.582757353782654, "step": 464 }, { "epoch": 1.39, "logps_train/policy_1_2": -191.93524169921875, "logps_train/policy_1_l": -179.33883666992188, "logps_train/policy_1_w": -147.97000122070312, "logps_train/policy_2_2": -125.91183471679688, "logps_train/policy_2_w": -231.471435546875, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": -1.7456239461898804, "rewards_train/1-l": -2.0758252143859863, "rewards_train/1-w": 3.4885475635528564, "rewards_train/2-2": 2.2635040283203125, "rewards_train/2-w": -1.875267744064331, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.564372777938843, "rewards_train/margins_1": 5.234171509742737, "rewards_train/margins_2": 4.1387717723846436, "step": 465 }, { "epoch": 1.39, "logps_train/policy_1_2": -110.4359130859375, "logps_train/policy_1_l": -107.54585266113281, "logps_train/policy_1_w": -76.9555892944336, "logps_train/policy_2_2": -68.3291015625, "logps_train/policy_2_w": -123.57815551757812, "logps_train/ref_1_2": -106.5, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -91.5, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": -0.39652109146118164, "rewards_train/1-l": -1.7520945072174072, "rewards_train/1-w": 2.495065689086914, "rewards_train/2-2": 2.3148441314697266, "rewards_train/2-w": -0.6957060098648071, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.247160196304321, "rewards_train/margins_1": 2.8915867805480957, "rewards_train/margins_2": 3.0105501413345337, "step": 465 }, { "epoch": 1.39, "logps_train/policy_1_2": -186.2347412109375, "logps_train/policy_1_l": -173.26901245117188, "logps_train/policy_1_w": -140.88818359375, "logps_train/policy_2_2": -121.84612274169922, "logps_train/policy_2_w": -210.71905517578125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -1.0933971405029297, "rewards_train/1-l": -2.569187641143799, "rewards_train/1-w": 3.3778810501098633, "rewards_train/2-2": 3.106502056121826, "rewards_train/2-w": -1.4521793127059937, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.947068691253662, "rewards_train/margins_1": 4.471278190612793, "rewards_train/margins_2": 4.55868136882782, "step": 465 }, { "epoch": 1.39, "logps_train/policy_1_2": -192.2891845703125, "logps_train/policy_1_l": -174.11639404296875, "logps_train/policy_1_w": -131.71868896484375, "logps_train/policy_2_2": -136.03890991210938, "logps_train/policy_2_w": -194.69317626953125, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -0.8625134825706482, "rewards_train/1-l": -2.5525574684143066, "rewards_train/1-w": 3.143756866455078, "rewards_train/2-2": 2.96856951713562, "rewards_train/2-w": -0.9849432706832886, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.696314334869385, "rewards_train/margins_1": 4.006270349025726, "rewards_train/margins_2": 3.9535127878189087, "step": 465 }, { "epoch": 1.4, "learning_rate": 1.1654415550880245e-06, "loss": 0.6611, "step": 466 }, { "epoch": 1.4, "logps_train/policy_1_2": -221.95318603515625, "logps_train/policy_1_l": -180.79119873046875, "logps_train/policy_1_w": -121.51676940917969, "logps_train/policy_2_2": -145.67103576660156, "logps_train/policy_2_w": -196.7718963623047, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -1.9773504734039307, "rewards_train/1-l": -2.408416748046875, "rewards_train/1-w": 2.9114089012145996, "rewards_train/2-2": 2.8395371437072754, "rewards_train/2-w": -1.7207452058792114, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.319825649261475, "rewards_train/margins_1": 4.88875937461853, "rewards_train/margins_2": 4.560282349586487, "step": 466 }, { "epoch": 1.4, "logps_train/policy_1_2": -178.65029907226562, "logps_train/policy_1_l": -159.47378540039062, "logps_train/policy_1_w": -124.31708526611328, "logps_train/policy_2_2": -116.10614013671875, "logps_train/policy_2_w": -189.85867309570312, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.4792879819869995, "rewards_train/1-l": -1.8623671531677246, "rewards_train/1-w": 2.788212776184082, "rewards_train/2-2": 2.929180860519409, "rewards_train/2-w": -1.8132120370864868, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.650579929351807, "rewards_train/margins_1": 4.2675007581710815, "rewards_train/margins_2": 4.742392897605896, "step": 466 }, { "epoch": 1.4, "logps_train/policy_1_2": -168.08056640625, "logps_train/policy_1_l": -180.81118774414062, "logps_train/policy_1_w": -122.57247161865234, "logps_train/policy_2_2": -101.26045227050781, "logps_train/policy_2_w": -197.02633666992188, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.5494623184204102, "rewards_train/1-l": -2.4437179565429688, "rewards_train/1-w": 2.9185338020324707, "rewards_train/2-2": 2.8649709224700928, "rewards_train/2-w": -1.6854455471038818, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.3622517585754395, "rewards_train/margins_1": 4.467996120452881, "rewards_train/margins_2": 4.550416469573975, "step": 466 }, { "epoch": 1.4, "logps_train/policy_1_2": -139.63275146484375, "logps_train/policy_1_l": -125.71427917480469, "logps_train/policy_1_w": -100.13269805908203, "logps_train/policy_2_2": -82.48176574707031, "logps_train/policy_2_w": -150.6753387451172, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": -1.0632760524749756, "rewards_train/1-l": -2.1723074913024902, "rewards_train/1-w": 2.8269643783569336, "rewards_train/2-2": 2.811589241027832, "rewards_train/2-w": -0.6251522302627563, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.999271869659424, "rewards_train/margins_1": 3.890240430831909, "rewards_train/margins_2": 3.4367414712905884, "step": 466 }, { "epoch": 1.4, "logps_train/policy_1_2": -135.08218383789062, "logps_train/policy_1_l": -127.28308868408203, "logps_train/policy_1_w": -98.62541198730469, "logps_train/policy_2_2": -83.11507415771484, "logps_train/policy_2_w": -162.00445556640625, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -104.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -1.6496248245239258, "rewards_train/1-l": -2.1519417762756348, "rewards_train/1-w": 2.388141632080078, "rewards_train/2-2": 2.1150553226470947, "rewards_train/2-w": -2.133258581161499, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.540083408355713, "rewards_train/margins_1": 4.037766456604004, "rewards_train/margins_2": 4.248313903808594, "step": 467 }, { "epoch": 1.4, "logps_train/policy_1_2": -128.56309509277344, "logps_train/policy_1_l": -135.5459747314453, "logps_train/policy_1_w": -83.0648193359375, "logps_train/policy_2_2": -81.98721313476562, "logps_train/policy_2_w": -115.06453704833984, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -107.5, "logps_train/ref_1_w": -102.5, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -112.0, "rewards_train/1-2": -1.0060651302337646, "rewards_train/1-l": -2.7879486083984375, "rewards_train/1-w": 1.9624149799346924, "rewards_train/2-2": 2.2932214736938477, "rewards_train/2-w": -0.29048672318458557, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.75036358833313, "rewards_train/margins_1": 2.968480110168457, "rewards_train/margins_2": 2.5837081968784332, "step": 467 }, { "epoch": 1.4, "logps_train/policy_1_2": -199.1478729248047, "logps_train/policy_1_l": -174.02047729492188, "logps_train/policy_1_w": -87.1270751953125, "logps_train/policy_2_2": -121.52198028564453, "logps_train/policy_2_w": -154.00868225097656, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.856194257736206, "rewards_train/1-l": -2.6099343299865723, "rewards_train/1-w": 2.71658992767334, "rewards_train/2-2": 3.055223226547241, "rewards_train/2-w": -1.364149570465088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.326524257659912, "rewards_train/margins_1": 4.572784185409546, "rewards_train/margins_2": 4.419372797012329, "step": 467 }, { "epoch": 1.4, "logps_train/policy_1_2": -182.16787719726562, "logps_train/policy_1_l": -178.53817749023438, "logps_train/policy_1_w": -163.48715209960938, "logps_train/policy_2_2": -116.632080078125, "logps_train/policy_2_w": -245.33428955078125, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": -1.1253825426101685, "rewards_train/1-l": -1.629866361618042, "rewards_train/1-w": 3.564565658569336, "rewards_train/2-2": 3.0403072834014893, "rewards_train/2-w": -1.6580390930175781, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.194432020187378, "rewards_train/margins_1": 4.689948201179504, "rewards_train/margins_2": 4.698346376419067, "step": 467 }, { "epoch": 1.4, "learning_rate": 1.1446224430961037e-06, "loss": 0.5893, "step": 468 }, { "epoch": 1.4, "logps_train/policy_1_2": -214.13059997558594, "logps_train/policy_1_l": -228.34765625, "logps_train/policy_1_w": -117.99691772460938, "logps_train/policy_2_2": -150.19674682617188, "logps_train/policy_2_w": -184.4501953125, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.1670643091201782, "rewards_train/1-l": -3.0717267990112305, "rewards_train/1-w": 2.879018783569336, "rewards_train/2-2": 3.118069648742676, "rewards_train/2-w": -1.0958006381988525, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.950745582580566, "rewards_train/margins_1": 4.046083092689514, "rewards_train/margins_2": 4.213870286941528, "step": 468 }, { "epoch": 1.4, "logps_train/policy_1_2": -186.23016357421875, "logps_train/policy_1_l": -224.131591796875, "logps_train/policy_1_w": -142.54119873046875, "logps_train/policy_2_2": -117.3088150024414, "logps_train/policy_2_w": -223.91409301757812, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -1.4417665004730225, "rewards_train/1-l": -2.8511593341827393, "rewards_train/1-w": 3.2867860794067383, "rewards_train/2-2": 2.949000835418701, "rewards_train/2-w": -1.4528354406356812, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.1379454135894775, "rewards_train/margins_1": 4.728552579879761, "rewards_train/margins_2": 4.401836276054382, "step": 468 }, { "epoch": 1.4, "logps_train/policy_1_2": -229.4070281982422, "logps_train/policy_1_l": -216.20245361328125, "logps_train/policy_1_w": -143.68148803710938, "logps_train/policy_2_2": -143.55902099609375, "logps_train/policy_2_w": -221.29171752929688, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": -2.404179811477661, "rewards_train/1-l": -2.7311835289001465, "rewards_train/1-w": 2.907633066177368, "rewards_train/2-2": 3.4995670318603516, "rewards_train/2-w": -2.2303433418273926, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.638816595077515, "rewards_train/margins_1": 5.311812877655029, "rewards_train/margins_2": 5.729910373687744, "step": 468 }, { "epoch": 1.4, "logps_train/policy_1_2": -118.48519897460938, "logps_train/policy_1_l": -176.45263671875, "logps_train/policy_1_w": -87.86949157714844, "logps_train/policy_2_2": -76.03182983398438, "logps_train/policy_2_w": -153.5330047607422, "logps_train/ref_1_2": -112.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -96.5, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": -0.6364104151725769, "rewards_train/1-l": -1.8868639469146729, "rewards_train/1-w": 2.910121440887451, "rewards_train/2-2": 2.0300204753875732, "rewards_train/2-w": -1.4540812969207764, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.796985387802124, "rewards_train/margins_1": 3.546531856060028, "rewards_train/margins_2": 3.4841017723083496, "step": 468 }, { "epoch": 1.4, "logps_train/policy_1_2": -198.25714111328125, "logps_train/policy_1_l": -170.33689880371094, "logps_train/policy_1_w": -99.27600860595703, "logps_train/policy_2_2": -121.6180191040039, "logps_train/policy_2_w": -163.72161865234375, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -2.037628412246704, "rewards_train/1-l": -2.636864185333252, "rewards_train/1-w": 3.206969976425171, "rewards_train/2-2": 3.1645658016204834, "rewards_train/2-w": -1.0864198207855225, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.843834161758423, "rewards_train/margins_1": 5.244598388671875, "rewards_train/margins_2": 4.250985622406006, "step": 469 }, { "epoch": 1.4, "logps_train/policy_1_2": -129.3023223876953, "logps_train/policy_1_l": -157.3857421875, "logps_train/policy_1_w": -107.07816314697266, "logps_train/policy_2_2": -91.90965270996094, "logps_train/policy_2_w": -156.50302124023438, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -0.06773290038108826, "rewards_train/1-l": -2.2893567085266113, "rewards_train/1-w": 2.9535114765167236, "rewards_train/2-2": 2.3893089294433594, "rewards_train/2-w": -0.31104493141174316, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.242868185043335, "rewards_train/margins_1": 3.021244376897812, "rewards_train/margins_2": 2.7003538608551025, "step": 469 }, { "epoch": 1.4, "logps_train/policy_1_2": -136.48191833496094, "logps_train/policy_1_l": -136.90878295898438, "logps_train/policy_1_w": -99.06541442871094, "logps_train/policy_2_2": -89.03778076171875, "logps_train/policy_2_w": -156.2979736328125, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -124.5, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -1.098973035812378, "rewards_train/1-l": -1.9918550252914429, "rewards_train/1-w": 2.5458030700683594, "rewards_train/2-2": 2.2255184650421143, "rewards_train/2-w": -1.2520619630813599, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.537658095359802, "rewards_train/margins_1": 3.6447761058807373, "rewards_train/margins_2": 3.477580428123474, "step": 469 }, { "epoch": 1.4, "logps_train/policy_1_2": -168.65234375, "logps_train/policy_1_l": -189.59182739257812, "logps_train/policy_1_w": -125.12562561035156, "logps_train/policy_2_2": -113.68377685546875, "logps_train/policy_2_w": -179.7824249267578, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.1726559400558472, "rewards_train/1-l": -3.508791923522949, "rewards_train/1-w": 2.663414478302002, "rewards_train/2-2": 2.5675601959228516, "rewards_train/2-w": -0.9782431125640869, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.172206401824951, "rewards_train/margins_1": 3.836070418357849, "rewards_train/margins_2": 3.5458033084869385, "step": 469 }, { "epoch": 1.41, "learning_rate": 1.1239356135643544e-06, "loss": 0.6554, "step": 470 }, { "epoch": 1.41, "logps_train/policy_1_2": -226.12673950195312, "logps_train/policy_1_l": -192.4293212890625, "logps_train/policy_1_w": -148.20230102539062, "logps_train/policy_2_2": -141.0983428955078, "logps_train/policy_2_w": -246.08831787109375, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -2.0915799140930176, "rewards_train/1-l": -2.5168564319610596, "rewards_train/1-w": 3.6762535572052, "rewards_train/2-2": 3.4800095558166504, "rewards_train/2-w": -2.6705493927001953, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.19310998916626, "rewards_train/margins_1": 5.767833471298218, "rewards_train/margins_2": 6.150558948516846, "step": 470 }, { "epoch": 1.41, "logps_train/policy_1_2": -211.46759033203125, "logps_train/policy_1_l": -188.2081298828125, "logps_train/policy_1_w": -123.83003234863281, "logps_train/policy_2_2": -135.96156311035156, "logps_train/policy_2_w": -198.37567138671875, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.5531079769134521, "rewards_train/1-l": -2.2609505653381348, "rewards_train/1-w": 3.469341278076172, "rewards_train/2-2": 2.9817728996276855, "rewards_train/2-w": -1.3031928539276123, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.730291843414307, "rewards_train/margins_1": 5.022449254989624, "rewards_train/margins_2": 4.284965753555298, "step": 470 }, { "epoch": 1.41, "logps_train/policy_1_2": -250.31617736816406, "logps_train/policy_1_l": -197.8651885986328, "logps_train/policy_1_w": -138.11306762695312, "logps_train/policy_2_2": -145.16845703125, "logps_train/policy_2_w": -228.5552978515625, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -2.9933371543884277, "rewards_train/1-l": -2.4198203086853027, "rewards_train/1-w": 3.322678565979004, "rewards_train/2-2": 3.811278820037842, "rewards_train/2-w": -2.3258421421051025, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.742498874664307, "rewards_train/margins_1": 6.316015720367432, "rewards_train/margins_2": 6.137120962142944, "step": 470 }, { "epoch": 1.41, "logps_train/policy_1_2": -137.9121551513672, "logps_train/policy_1_l": -138.97463989257812, "logps_train/policy_1_w": -73.94339752197266, "logps_train/policy_2_2": -79.35934448242188, "logps_train/policy_2_w": -125.25104522705078, "logps_train/ref_1_2": -124.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -97.0, "logps_train/ref_2_2": -102.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": -1.3888723850250244, "rewards_train/1-l": -2.5336453914642334, "rewards_train/1-w": 2.305269479751587, "rewards_train/2-2": 2.305471658706665, "rewards_train/2-w": -1.0585036277770996, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.83891487121582, "rewards_train/margins_1": 3.6941418647766113, "rewards_train/margins_2": 3.3639752864837646, "step": 470 }, { "epoch": 1.41, "logps_train/policy_1_2": -213.2816925048828, "logps_train/policy_1_l": -220.51992797851562, "logps_train/policy_1_w": -147.92132568359375, "logps_train/policy_2_2": -141.7377166748047, "logps_train/policy_2_w": -238.19537353515625, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": -1.694966197013855, "rewards_train/1-l": -3.073868751525879, "rewards_train/1-w": 3.275054693222046, "rewards_train/2-2": 2.962946891784668, "rewards_train/2-w": -2.3023505210876465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.348923444747925, "rewards_train/margins_1": 4.970020890235901, "rewards_train/margins_2": 5.2652974128723145, "step": 471 }, { "epoch": 1.41, "logps_train/policy_1_2": -187.52749633789062, "logps_train/policy_1_l": -229.9205780029297, "logps_train/policy_1_w": -112.80693817138672, "logps_train/policy_2_2": -112.72726440429688, "logps_train/policy_2_w": -189.90176391601562, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.9707183837890625, "rewards_train/1-l": -2.338932991027832, "rewards_train/1-w": 2.893915891647339, "rewards_train/2-2": 2.52942156791687, "rewards_train/2-w": -2.17767596244812, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.232848882675171, "rewards_train/margins_1": 4.864634275436401, "rewards_train/margins_2": 4.70709753036499, "step": 471 }, { "epoch": 1.41, "logps_train/policy_1_2": -193.72097778320312, "logps_train/policy_1_l": -213.49159240722656, "logps_train/policy_1_w": -152.58116149902344, "logps_train/policy_2_2": -144.13262939453125, "logps_train/policy_2_w": -201.46395874023438, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -0.650221586227417, "rewards_train/1-l": -2.8992574214935303, "rewards_train/1-w": 2.8994033336639404, "rewards_train/2-2": 2.8447446823120117, "rewards_train/2-w": -0.5611408352851868, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.798660755157471, "rewards_train/margins_1": 3.5496249198913574, "rewards_train/margins_2": 3.4058855175971985, "step": 471 }, { "epoch": 1.41, "logps_train/policy_1_2": -246.6337890625, "logps_train/policy_1_l": -183.0128173828125, "logps_train/policy_1_w": -136.9193115234375, "logps_train/policy_2_2": -154.8067169189453, "logps_train/policy_2_w": -220.17529296875, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -2.0520505905151367, "rewards_train/1-l": -2.2245240211486816, "rewards_train/1-w": 3.6858036518096924, "rewards_train/2-2": 3.6148366928100586, "rewards_train/2-w": -1.7140135765075684, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.910327672958374, "rewards_train/margins_1": 5.737854242324829, "rewards_train/margins_2": 5.328850269317627, "step": 471 }, { "epoch": 1.41, "learning_rate": 1.1033830854908692e-06, "loss": 0.4781, "step": 472 }, { "epoch": 1.41, "logps_train/policy_1_2": -244.7804718017578, "logps_train/policy_1_l": -258.4429016113281, "logps_train/policy_1_w": -150.79946899414062, "logps_train/policy_2_2": -174.12387084960938, "logps_train/policy_2_w": -222.626708984375, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -233.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -1.6811720132827759, "rewards_train/1-l": -2.5040552616119385, "rewards_train/1-w": 3.4427103996276855, "rewards_train/2-2": 3.0284337997436523, "rewards_train/2-w": -1.2587634325027466, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.946765661239624, "rewards_train/margins_1": 5.123882412910461, "rewards_train/margins_2": 4.287197232246399, "step": 472 }, { "epoch": 1.41, "logps_train/policy_1_2": -185.49319458007812, "logps_train/policy_1_l": -182.3922576904297, "logps_train/policy_1_w": -114.76160430908203, "logps_train/policy_2_2": -129.0401611328125, "logps_train/policy_2_w": -165.9124755859375, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -0.9734400510787964, "rewards_train/1-l": -2.1552412509918213, "rewards_train/1-w": 2.59298038482666, "rewards_train/2-2": 2.6997923851013184, "rewards_train/2-w": -0.8385136127471924, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.7482216358184814, "rewards_train/margins_1": 3.5664204359054565, "rewards_train/margins_2": 3.5383059978485107, "step": 472 }, { "epoch": 1.41, "logps_train/policy_1_2": -138.5817108154297, "logps_train/policy_1_l": -162.07125854492188, "logps_train/policy_1_w": -92.74310302734375, "logps_train/policy_2_2": -84.96212005615234, "logps_train/policy_2_w": -148.82266235351562, "logps_train/ref_1_2": -128.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -1.0640296936035156, "rewards_train/1-l": -2.2036099433898926, "rewards_train/1-w": 2.5164122581481934, "rewards_train/2-2": 2.6647255420684814, "rewards_train/2-w": -1.3574625253677368, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.720022201538086, "rewards_train/margins_1": 3.580441951751709, "rewards_train/margins_2": 4.022188067436218, "step": 472 }, { "epoch": 1.41, "logps_train/policy_1_2": -181.76251220703125, "logps_train/policy_1_l": -237.64683532714844, "logps_train/policy_1_w": -150.827392578125, "logps_train/policy_2_2": -124.66287231445312, "logps_train/policy_2_w": -234.2788848876953, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -0.3832824230194092, "rewards_train/1-l": -2.9008164405822754, "rewards_train/1-w": 2.8781018257141113, "rewards_train/2-2": 3.4430882930755615, "rewards_train/2-w": -2.3983964920043945, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.778918266296387, "rewards_train/margins_1": 3.2613842487335205, "rewards_train/margins_2": 5.841484785079956, "step": 472 }, { "epoch": 1.42, "logps_train/policy_1_2": -246.45736694335938, "logps_train/policy_1_l": -221.4786376953125, "logps_train/policy_1_w": -157.82574462890625, "logps_train/policy_2_2": -169.70755004882812, "logps_train/policy_2_w": -234.15850830078125, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -211.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -1.0418310165405273, "rewards_train/1-l": -3.073596477508545, "rewards_train/1-w": 3.7217226028442383, "rewards_train/2-2": 4.153659820556641, "rewards_train/2-w": -1.381083369255066, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.795319080352783, "rewards_train/margins_1": 4.763553619384766, "rewards_train/margins_2": 5.5347431898117065, "step": 473 }, { "epoch": 1.42, "logps_train/policy_1_2": -152.11318969726562, "logps_train/policy_1_l": -145.41656494140625, "logps_train/policy_1_w": -106.73776245117188, "logps_train/policy_2_2": -93.81387329101562, "logps_train/policy_2_w": -176.44247436523438, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -1.1972565650939941, "rewards_train/1-l": -2.1518123149871826, "rewards_train/1-w": 2.969778060913086, "rewards_train/2-2": 2.33013653755188, "rewards_train/2-w": -1.5840904712677002, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.1215903759002686, "rewards_train/margins_1": 4.16703462600708, "rewards_train/margins_2": 3.91422700881958, "step": 473 }, { "epoch": 1.42, "logps_train/policy_1_2": -175.65762329101562, "logps_train/policy_1_l": -175.569580078125, "logps_train/policy_1_w": -99.79045104980469, "logps_train/policy_2_2": -108.56393432617188, "logps_train/policy_2_w": -179.5662841796875, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -1.9591221809387207, "rewards_train/1-l": -2.067894458770752, "rewards_train/1-w": 3.004939079284668, "rewards_train/2-2": 2.6689977645874023, "rewards_train/2-w": -2.1484246253967285, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.07283353805542, "rewards_train/margins_1": 4.964061260223389, "rewards_train/margins_2": 4.817422389984131, "step": 473 }, { "epoch": 1.42, "logps_train/policy_1_2": -218.75921630859375, "logps_train/policy_1_l": -203.08526611328125, "logps_train/policy_1_w": -143.58616638183594, "logps_train/policy_2_2": -144.12765502929688, "logps_train/policy_2_w": -222.4801788330078, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -1.3317804336547852, "rewards_train/1-l": -3.3077449798583984, "rewards_train/1-w": 3.982009172439575, "rewards_train/2-2": 3.959890842437744, "rewards_train/2-w": -1.2955763339996338, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 7.289754152297974, "rewards_train/margins_1": 5.31378960609436, "rewards_train/margins_2": 5.255467176437378, "step": 473 }, { "epoch": 1.42, "learning_rate": 1.0829668647661558e-06, "loss": 0.6443, "step": 474 }, { "epoch": 1.42, "logps_train/policy_1_2": -165.48153686523438, "logps_train/policy_1_l": -185.51046752929688, "logps_train/policy_1_w": -140.3212890625, "logps_train/policy_2_2": -117.38062286376953, "logps_train/policy_2_w": -222.00790405273438, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -0.44502803683280945, "rewards_train/1-l": -2.1504602432250977, "rewards_train/1-w": 2.5776355266571045, "rewards_train/2-2": 2.6033437252044678, "rewards_train/2-w": -2.015047073364258, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.728095769882202, "rewards_train/margins_1": 3.022663563489914, "rewards_train/margins_2": 4.618390798568726, "step": 474 }, { "epoch": 1.42, "logps_train/policy_1_2": -156.92617797851562, "logps_train/policy_1_l": -179.59555053710938, "logps_train/policy_1_w": -77.74063873291016, "logps_train/policy_2_2": -90.51486206054688, "logps_train/policy_2_w": -141.010498046875, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -121.5, "rewards_train/1-2": -2.469377040863037, "rewards_train/1-l": -2.6482272148132324, "rewards_train/1-w": 2.3540613651275635, "rewards_train/2-2": 2.2625765800476074, "rewards_train/2-w": -1.9651107788085938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.002288579940796, "rewards_train/margins_1": 4.823438405990601, "rewards_train/margins_2": 4.227687358856201, "step": 474 }, { "epoch": 1.42, "logps_train/policy_1_2": -207.70806884765625, "logps_train/policy_1_l": -177.97462463378906, "logps_train/policy_1_w": -114.6461181640625, "logps_train/policy_2_2": -130.67755126953125, "logps_train/policy_2_w": -173.26429748535156, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.850494384765625, "rewards_train/1-l": -2.202735424041748, "rewards_train/1-w": 2.924450397491455, "rewards_train/2-2": 3.367205858230591, "rewards_train/2-w": -1.1825573444366455, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.127185821533203, "rewards_train/margins_1": 4.77494478225708, "rewards_train/margins_2": 4.549763202667236, "step": 474 }, { "epoch": 1.42, "logps_train/policy_1_2": -166.29420471191406, "logps_train/policy_1_l": -171.615966796875, "logps_train/policy_1_w": -91.10459899902344, "logps_train/policy_2_2": -97.5522689819336, "logps_train/policy_2_w": -151.9052734375, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -2.3919200897216797, "rewards_train/1-l": -2.820483446121216, "rewards_train/1-w": 2.520301342010498, "rewards_train/2-2": 2.438523292541504, "rewards_train/2-w": -1.3763670921325684, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.340784788131714, "rewards_train/margins_1": 4.912221431732178, "rewards_train/margins_2": 3.8148903846740723, "step": 474 }, { "epoch": 1.42, "logps_train/policy_1_2": -169.83670043945312, "logps_train/policy_1_l": -142.2063446044922, "logps_train/policy_1_w": -97.20448303222656, "logps_train/policy_2_2": -106.07264709472656, "logps_train/policy_2_w": -149.28204345703125, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -1.3844507932662964, "rewards_train/1-l": -1.7292284965515137, "rewards_train/1-w": 2.814316987991333, "rewards_train/2-2": 2.963047981262207, "rewards_train/2-w": -0.7098445892333984, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.543545484542847, "rewards_train/margins_1": 4.198767781257629, "rewards_train/margins_2": 3.6728925704956055, "step": 475 }, { "epoch": 1.42, "logps_train/policy_1_2": -227.15423583984375, "logps_train/policy_1_l": -199.79556274414062, "logps_train/policy_1_w": -104.59861755371094, "logps_train/policy_2_2": -142.81829833984375, "logps_train/policy_2_w": -171.7635498046875, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.9372979402542114, "rewards_train/1-l": -2.839968204498291, "rewards_train/1-w": 3.299513339996338, "rewards_train/2-2": 3.731257200241089, "rewards_train/2-w": -0.9622923135757446, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.139481544494629, "rewards_train/margins_1": 5.236811280250549, "rewards_train/margins_2": 4.6935495138168335, "step": 475 }, { "epoch": 1.42, "logps_train/policy_1_2": -159.319091796875, "logps_train/policy_1_l": -156.13916015625, "logps_train/policy_1_w": -99.66698455810547, "logps_train/policy_2_2": -105.83871459960938, "logps_train/policy_2_w": -141.59957885742188, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -0.6822991967201233, "rewards_train/1-l": -2.170850992202759, "rewards_train/1-w": 2.5825209617614746, "rewards_train/2-2": 2.7684712409973145, "rewards_train/2-w": -0.07323814928531647, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.753371953964233, "rewards_train/margins_1": 3.264820158481598, "rewards_train/margins_2": 2.841709390282631, "step": 475 }, { "epoch": 1.42, "logps_train/policy_1_2": -168.21282958984375, "logps_train/policy_1_l": -169.96051025390625, "logps_train/policy_1_w": -97.90248107910156, "logps_train/policy_2_2": -106.23789978027344, "logps_train/policy_2_w": -161.95623779296875, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -1.4185481071472168, "rewards_train/1-l": -2.6829652786254883, "rewards_train/1-w": 2.691392183303833, "rewards_train/2-2": 2.530702590942383, "rewards_train/2-w": -1.7807793617248535, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.374357461929321, "rewards_train/margins_1": 4.10994029045105, "rewards_train/margins_2": 4.311481952667236, "step": 475 }, { "epoch": 1.43, "learning_rate": 1.0626889439773661e-06, "loss": 0.6328, "step": 476 }, { "epoch": 1.43, "logps_train/policy_1_2": -189.903076171875, "logps_train/policy_1_l": -262.6691589355469, "logps_train/policy_1_w": -151.48439025878906, "logps_train/policy_2_2": -127.03848266601562, "logps_train/policy_2_w": -227.24110412597656, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -230.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -0.7223390340805054, "rewards_train/1-l": -3.2307825088500977, "rewards_train/1-w": 3.4332005977630615, "rewards_train/2-2": 3.3172454833984375, "rewards_train/2-w": -1.3053600788116455, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.663983106613159, "rewards_train/margins_1": 4.155539631843567, "rewards_train/margins_2": 4.622605562210083, "step": 476 }, { "epoch": 1.43, "logps_train/policy_1_2": -182.44271850585938, "logps_train/policy_1_l": -167.41360473632812, "logps_train/policy_1_w": -136.0765838623047, "logps_train/policy_2_2": -122.51162719726562, "logps_train/policy_2_w": -191.95077514648438, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.2544782161712646, "rewards_train/1-l": -2.1103549003601074, "rewards_train/1-w": 3.462751865386963, "rewards_train/2-2": 2.757772922515869, "rewards_train/2-w": -0.14077983796596527, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.57310676574707, "rewards_train/margins_1": 4.7172300815582275, "rewards_train/margins_2": 2.8985527604818344, "step": 476 }, { "epoch": 1.43, "logps_train/policy_1_2": -146.63671875, "logps_train/policy_1_l": -142.32870483398438, "logps_train/policy_1_w": -107.43095397949219, "logps_train/policy_2_2": -97.63526153564453, "logps_train/policy_2_w": -159.0716552734375, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -0.7066402435302734, "rewards_train/1-l": -1.6453713178634644, "rewards_train/1-w": 2.9050488471984863, "rewards_train/2-2": 2.529247283935547, "rewards_train/2-w": -0.7443722486495972, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.550420165061951, "rewards_train/margins_1": 3.6116890907287598, "rewards_train/margins_2": 3.273619532585144, "step": 476 }, { "epoch": 1.43, "logps_train/policy_1_2": -182.09664916992188, "logps_train/policy_1_l": -211.10037231445312, "logps_train/policy_1_w": -124.21243286132812, "logps_train/policy_2_2": -121.409912109375, "logps_train/policy_2_w": -185.26173400878906, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.271970510482788, "rewards_train/1-l": -2.861699104309082, "rewards_train/1-w": 2.6689910888671875, "rewards_train/2-2": 2.8324458599090576, "rewards_train/2-w": -1.2046877145767212, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.5306901931762695, "rewards_train/margins_1": 3.9409615993499756, "rewards_train/margins_2": 4.037133574485779, "step": 476 }, { "epoch": 1.43, "logps_train/policy_1_2": -149.5831298828125, "logps_train/policy_1_l": -182.2047576904297, "logps_train/policy_1_w": -83.08172607421875, "logps_train/policy_2_2": -103.1542739868164, "logps_train/policy_2_w": -123.17094421386719, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -104.5, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": -1.090149998664856, "rewards_train/1-l": -2.682512044906616, "rewards_train/1-w": 2.1292059421539307, "rewards_train/2-2": 2.27246356010437, "rewards_train/2-w": -0.6267621517181396, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.811717987060547, "rewards_train/margins_1": 3.2193559408187866, "rewards_train/margins_2": 2.8992257118225098, "step": 477 }, { "epoch": 1.43, "logps_train/policy_1_2": -114.02934265136719, "logps_train/policy_1_l": -97.25277709960938, "logps_train/policy_1_w": -75.30461120605469, "logps_train/policy_2_2": -64.68653106689453, "logps_train/policy_2_w": -117.27509307861328, "logps_train/ref_1_2": -99.5, "logps_train/ref_1_l": -78.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -85.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": -1.450589895248413, "rewards_train/1-l": -1.9370940923690796, "rewards_train/1-w": 1.9604570865631104, "rewards_train/2-2": 2.0217766761779785, "rewards_train/2-w": -0.8502637147903442, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.89755117893219, "rewards_train/margins_1": 3.4110469818115234, "rewards_train/margins_2": 2.8720403909683228, "step": 477 }, { "epoch": 1.43, "logps_train/policy_1_2": -168.37405395507812, "logps_train/policy_1_l": -199.00796508789062, "logps_train/policy_1_w": -76.13284301757812, "logps_train/policy_2_2": -101.25223541259766, "logps_train/policy_2_w": -140.38983154296875, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -121.5, "rewards_train/1-2": -2.289943218231201, "rewards_train/1-l": -2.282156229019165, "rewards_train/1-w": 1.9556615352630615, "rewards_train/2-2": 1.9986047744750977, "rewards_train/2-w": -1.8854666948318481, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.237817764282227, "rewards_train/margins_1": 4.245604753494263, "rewards_train/margins_2": 3.884071469306946, "step": 477 }, { "epoch": 1.43, "logps_train/policy_1_2": -193.90444946289062, "logps_train/policy_1_l": -174.22308349609375, "logps_train/policy_1_w": -132.974365234375, "logps_train/policy_2_2": -126.03982543945312, "logps_train/policy_2_w": -201.75491333007812, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -0.8271633386611938, "rewards_train/1-l": -2.0451598167419434, "rewards_train/1-w": 3.5478758811950684, "rewards_train/2-2": 3.2233614921569824, "rewards_train/2-w": -1.0544947385787964, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.593035697937012, "rewards_train/margins_1": 4.375039219856262, "rewards_train/margins_2": 4.277856230735779, "step": 477 }, { "epoch": 1.43, "learning_rate": 1.0425513022138203e-06, "loss": 0.6247, "step": 478 }, { "epoch": 1.43, "logps_train/policy_1_2": -198.795166015625, "logps_train/policy_1_l": -185.73199462890625, "logps_train/policy_1_w": -114.31753540039062, "logps_train/policy_2_2": -113.783935546875, "logps_train/policy_2_w": -191.05715942382812, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -2.338013172149658, "rewards_train/1-l": -2.0204405784606934, "rewards_train/1-w": 3.1516690254211426, "rewards_train/2-2": 3.012524366378784, "rewards_train/2-w": -1.5725129842758179, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.172109603881836, "rewards_train/margins_1": 5.489682197570801, "rewards_train/margins_2": 4.585037350654602, "step": 478 }, { "epoch": 1.43, "logps_train/policy_1_2": -130.72344970703125, "logps_train/policy_1_l": -103.504638671875, "logps_train/policy_1_w": -64.31957244873047, "logps_train/policy_2_2": -90.03964233398438, "logps_train/policy_2_w": -105.84133911132812, "logps_train/ref_1_2": -125.5, "logps_train/ref_1_l": -87.0, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -99.5, "rewards_train/1-2": -0.4860166311264038, "rewards_train/1-l": -1.6730222702026367, "rewards_train/1-w": 2.063648223876953, "rewards_train/2-2": 2.37796950340271, "rewards_train/2-w": -0.6558623909950256, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.73667049407959, "rewards_train/margins_1": 2.549664855003357, "rewards_train/margins_2": 3.0338318943977356, "step": 478 }, { "epoch": 1.43, "logps_train/policy_1_2": -252.20518493652344, "logps_train/policy_1_l": -156.98773193359375, "logps_train/policy_1_w": -119.92430877685547, "logps_train/policy_2_2": -161.91024780273438, "logps_train/policy_2_w": -190.77252197265625, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -2.3166117668151855, "rewards_train/1-l": -1.8557078838348389, "rewards_train/1-w": 3.509131908416748, "rewards_train/2-2": 4.181632041931152, "rewards_train/2-w": -1.1280338764190674, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.364839792251587, "rewards_train/margins_1": 5.825743675231934, "rewards_train/margins_2": 5.30966591835022, "step": 478 }, { "epoch": 1.43, "logps_train/policy_1_2": -233.485107421875, "logps_train/policy_1_l": -212.75238037109375, "logps_train/policy_1_w": -152.5297393798828, "logps_train/policy_2_2": -149.65972900390625, "logps_train/policy_2_w": -229.41162109375, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": -1.9708251953125, "rewards_train/1-l": -2.013716459274292, "rewards_train/1-w": 3.313042163848877, "rewards_train/2-2": 3.74808931350708, "rewards_train/2-w": -1.8659662008285522, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.326758623123169, "rewards_train/margins_1": 5.283867359161377, "rewards_train/margins_2": 5.614055514335632, "step": 478 }, { "epoch": 1.43, "logps_train/policy_1_2": -183.92169189453125, "logps_train/policy_1_l": -289.85186767578125, "logps_train/policy_1_w": -131.09014892578125, "logps_train/policy_2_2": -118.31422424316406, "logps_train/policy_2_w": -208.29934692382812, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -251.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -1.1570125818252563, "rewards_train/1-l": -3.9463672637939453, "rewards_train/1-w": 3.1085641384124756, "rewards_train/2-2": 3.1763901710510254, "rewards_train/2-w": -1.531886339187622, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.054931402206421, "rewards_train/margins_1": 4.265576720237732, "rewards_train/margins_2": 4.7082765102386475, "step": 479 }, { "epoch": 1.43, "logps_train/policy_1_2": -161.2650146484375, "logps_train/policy_1_l": -204.86505126953125, "logps_train/policy_1_w": -108.73515319824219, "logps_train/policy_2_2": -92.33231353759766, "logps_train/policy_2_w": -187.51776123046875, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.7784537076950073, "rewards_train/1-l": -2.854865074157715, "rewards_train/1-w": 2.8647665977478027, "rewards_train/2-2": 2.7965049743652344, "rewards_train/2-w": -1.954315423965454, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.719631671905518, "rewards_train/margins_1": 4.64322030544281, "rewards_train/margins_2": 4.7508203983306885, "step": 479 }, { "epoch": 1.43, "logps_train/policy_1_2": -183.79605102539062, "logps_train/policy_1_l": -150.99288940429688, "logps_train/policy_1_w": -99.58177185058594, "logps_train/policy_2_2": -123.65129089355469, "logps_train/policy_2_w": -158.9112548828125, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -1.202262282371521, "rewards_train/1-l": -2.004952907562256, "rewards_train/1-w": 2.827857494354248, "rewards_train/2-2": 2.8961997032165527, "rewards_train/2-w": -0.9531850814819336, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.832810401916504, "rewards_train/margins_1": 4.030119776725769, "rewards_train/margins_2": 3.8493847846984863, "step": 479 }, { "epoch": 1.43, "logps_train/policy_1_2": -199.07147216796875, "logps_train/policy_1_l": -179.24403381347656, "logps_train/policy_1_w": -151.42373657226562, "logps_train/policy_2_2": -114.27288055419922, "logps_train/policy_2_w": -241.8135986328125, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -2.323554277420044, "rewards_train/1-l": -2.360975980758667, "rewards_train/1-w": 3.7287204265594482, "rewards_train/2-2": 2.8459548950195312, "rewards_train/2-w": -2.007727861404419, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.089696407318115, "rewards_train/margins_1": 6.052274703979492, "rewards_train/margins_2": 4.85368275642395, "step": 479 }, { "epoch": 1.44, "learning_rate": 1.0225559048738548e-06, "loss": 0.58, "step": 480 }, { "epoch": 1.44, "logps_train/policy_1_2": -268.0580139160156, "logps_train/policy_1_l": -186.50119018554688, "logps_train/policy_1_w": -145.06088256835938, "logps_train/policy_2_2": -164.61129760742188, "logps_train/policy_2_w": -228.0025634765625, "logps_train/ref_1_2": -240.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": -2.721426010131836, "rewards_train/1-l": -2.5416419506073, "rewards_train/1-w": 4.017233371734619, "rewards_train/2-2": 4.060745716094971, "rewards_train/2-w": -1.5119496583938599, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.558875322341919, "rewards_train/margins_1": 6.738659381866455, "rewards_train/margins_2": 5.572695374488831, "step": 480 }, { "epoch": 1.44, "logps_train/policy_1_2": -255.4586944580078, "logps_train/policy_1_l": -224.0749969482422, "logps_train/policy_1_w": -120.4638671875, "logps_train/policy_2_2": -174.59922790527344, "logps_train/policy_2_w": -176.65562438964844, "logps_train/ref_1_2": -241.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.448212742805481, "rewards_train/1-l": -3.1729788780212402, "rewards_train/1-w": 2.8028321266174316, "rewards_train/2-2": 3.921327590942383, "rewards_train/2-w": -0.690953254699707, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.975811004638672, "rewards_train/margins_1": 4.251044869422913, "rewards_train/margins_2": 4.61228084564209, "step": 480 }, { "epoch": 1.44, "logps_train/policy_1_2": -176.65513610839844, "logps_train/policy_1_l": -118.1304931640625, "logps_train/policy_1_w": -117.8966064453125, "logps_train/policy_2_2": -109.35474395751953, "logps_train/policy_2_w": -200.52447509765625, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -1.58387291431427, "rewards_train/1-l": -2.377502679824829, "rewards_train/1-w": 3.449596881866455, "rewards_train/2-2": 3.106517791748047, "rewards_train/2-w": -2.2989325523376465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.827099561691284, "rewards_train/margins_1": 5.033469796180725, "rewards_train/margins_2": 5.405450344085693, "step": 480 }, { "epoch": 1.44, "logps_train/policy_1_2": -222.32855224609375, "logps_train/policy_1_l": -236.88572692871094, "logps_train/policy_1_w": -123.8101806640625, "logps_train/policy_2_2": -137.01992797851562, "logps_train/policy_2_w": -210.5487060546875, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -2.4921321868896484, "rewards_train/1-l": -3.179490327835083, "rewards_train/1-w": 2.667712688446045, "rewards_train/2-2": 3.0824804306030273, "rewards_train/2-w": -2.4689342975616455, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.847203016281128, "rewards_train/margins_1": 5.159844875335693, "rewards_train/margins_2": 5.551414728164673, "step": 480 }, { "epoch": 1.44, "logps_train/policy_1_2": -163.60369873046875, "logps_train/policy_1_l": -176.72589111328125, "logps_train/policy_1_w": -94.15080261230469, "logps_train/policy_2_2": -102.04846954345703, "logps_train/policy_2_w": -139.71939086914062, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": -0.8537309169769287, "rewards_train/1-l": -2.593097686767578, "rewards_train/1-w": 2.316951274871826, "rewards_train/2-2": 3.222106695175171, "rewards_train/2-w": -0.6867833137512207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.910048961639404, "rewards_train/margins_1": 3.170682191848755, "rewards_train/margins_2": 3.9088900089263916, "step": 481 }, { "epoch": 1.44, "logps_train/policy_1_2": -136.21676635742188, "logps_train/policy_1_l": -108.58782958984375, "logps_train/policy_1_w": -81.82534790039062, "logps_train/policy_2_2": -74.14346313476562, "logps_train/policy_2_w": -161.686767578125, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -1.6177701950073242, "rewards_train/1-l": -1.5585147142410278, "rewards_train/1-w": 2.592855930328369, "rewards_train/2-2": 2.2688565254211426, "rewards_train/2-w": -2.63381290435791, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.151370644569397, "rewards_train/margins_1": 4.210626125335693, "rewards_train/margins_2": 4.902669429779053, "step": 481 }, { "epoch": 1.44, "logps_train/policy_1_2": -170.37644958496094, "logps_train/policy_1_l": -122.62095642089844, "logps_train/policy_1_w": -89.77519226074219, "logps_train/policy_2_2": -108.4974594116211, "logps_train/policy_2_w": -139.95004272460938, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -104.5, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": -1.8091294765472412, "rewards_train/1-l": -1.8327019214630127, "rewards_train/1-w": 2.449824094772339, "rewards_train/2-2": 2.7313084602355957, "rewards_train/2-w": -1.4231281280517578, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.282526016235352, "rewards_train/margins_1": 4.25895357131958, "rewards_train/margins_2": 4.1544365882873535, "step": 481 }, { "epoch": 1.44, "logps_train/policy_1_2": -163.19378662109375, "logps_train/policy_1_l": -137.81240844726562, "logps_train/policy_1_w": -110.6466293334961, "logps_train/policy_2_2": -94.26313781738281, "logps_train/policy_2_w": -170.72457885742188, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": -2.2064881324768066, "rewards_train/1-l": -2.1960840225219727, "rewards_train/1-w": 2.5510597229003906, "rewards_train/2-2": 2.558061122894287, "rewards_train/2-w": -1.6066381931304932, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.747143745422363, "rewards_train/margins_1": 4.757547855377197, "rewards_train/margins_2": 4.16469931602478, "step": 481 }, { "epoch": 1.44, "learning_rate": 1.002704703473e-06, "loss": 0.5461, "step": 482 }, { "epoch": 1.44, "logps_train/policy_1_2": -156.35836791992188, "logps_train/policy_1_l": -142.1297149658203, "logps_train/policy_1_w": -105.29339599609375, "logps_train/policy_2_2": -105.09834289550781, "logps_train/policy_2_w": -159.18341064453125, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -1.1634725332260132, "rewards_train/1-l": -1.7881176471710205, "rewards_train/1-w": 2.5111875534057617, "rewards_train/2-2": 2.4428277015686035, "rewards_train/2-w": -1.2158749103546143, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.299305200576782, "rewards_train/margins_1": 3.674660086631775, "rewards_train/margins_2": 3.6587026119232178, "step": 482 }, { "epoch": 1.44, "logps_train/policy_1_2": -189.2758026123047, "logps_train/policy_1_l": -187.35891723632812, "logps_train/policy_1_w": -118.14601135253906, "logps_train/policy_2_2": -111.52301025390625, "logps_train/policy_2_w": -197.952880859375, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -2.547111988067627, "rewards_train/1-l": -2.2300312519073486, "rewards_train/1-w": 2.9020004272460938, "rewards_train/2-2": 2.677581548690796, "rewards_train/2-w": -2.295090913772583, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.132031679153442, "rewards_train/margins_1": 5.449112415313721, "rewards_train/margins_2": 4.972672462463379, "step": 482 }, { "epoch": 1.44, "logps_train/policy_1_2": -227.3583984375, "logps_train/policy_1_l": -240.79116821289062, "logps_train/policy_1_w": -140.45236206054688, "logps_train/policy_2_2": -152.29269409179688, "logps_train/policy_2_w": -214.87484741210938, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": -1.5940430164337158, "rewards_train/1-l": -3.3154444694519043, "rewards_train/1-w": 3.3051533699035645, "rewards_train/2-2": 3.598074197769165, "rewards_train/2-w": -1.396860957145691, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.620597839355469, "rewards_train/margins_1": 4.89919638633728, "rewards_train/margins_2": 4.994935154914856, "step": 482 }, { "epoch": 1.44, "logps_train/policy_1_2": -172.8372802734375, "logps_train/policy_1_l": -188.60421752929688, "logps_train/policy_1_w": -119.14328002929688, "logps_train/policy_2_2": -121.98787689208984, "logps_train/policy_2_w": -198.24073791503906, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -1.0016961097717285, "rewards_train/1-l": -2.598361015319824, "rewards_train/1-w": 3.23425555229187, "rewards_train/2-2": 2.347306251525879, "rewards_train/2-w": -1.9985363483428955, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.832616567611694, "rewards_train/margins_1": 4.235951662063599, "rewards_train/margins_2": 4.345842599868774, "step": 482 }, { "epoch": 1.45, "logps_train/policy_1_2": -203.4771728515625, "logps_train/policy_1_l": -224.47393798828125, "logps_train/policy_1_w": -146.39064025878906, "logps_train/policy_2_2": -129.53607177734375, "logps_train/policy_2_w": -240.66287231445312, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -197.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -1.5945905447006226, "rewards_train/1-l": -2.756281614303589, "rewards_train/1-w": 3.4052722454071045, "rewards_train/2-2": 3.2253975868225098, "rewards_train/2-w": -2.3209757804870605, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.161553859710693, "rewards_train/margins_1": 4.999862790107727, "rewards_train/margins_2": 5.54637336730957, "step": 483 }, { "epoch": 1.45, "logps_train/policy_1_2": -176.60556030273438, "logps_train/policy_1_l": -165.2497100830078, "logps_train/policy_1_w": -147.37599182128906, "logps_train/policy_2_2": -108.38548278808594, "logps_train/policy_2_w": -218.05157470703125, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.5782309770584106, "rewards_train/1-l": -2.1405956745147705, "rewards_train/1-w": 2.794529438018799, "rewards_train/2-2": 2.702662467956543, "rewards_train/2-w": -2.0282034873962402, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.935125112533569, "rewards_train/margins_1": 4.3727604150772095, "rewards_train/margins_2": 4.730865955352783, "step": 483 }, { "epoch": 1.45, "logps_train/policy_1_2": -191.2767791748047, "logps_train/policy_1_l": -188.40121459960938, "logps_train/policy_1_w": -112.24864959716797, "logps_train/policy_2_2": -122.9285888671875, "logps_train/policy_2_w": -188.17385864257812, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -1.7720133066177368, "rewards_train/1-l": -2.7692227363586426, "rewards_train/1-w": 3.060145139694214, "rewards_train/2-2": 2.731750726699829, "rewards_train/2-w": -1.9675817489624023, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.8293678760528564, "rewards_train/margins_1": 4.832158446311951, "rewards_train/margins_2": 4.6993324756622314, "step": 483 }, { "epoch": 1.45, "logps_train/policy_1_2": -239.1802978515625, "logps_train/policy_1_l": -207.10733032226562, "logps_train/policy_1_w": -137.34063720703125, "logps_train/policy_2_2": -160.6358184814453, "logps_train/policy_2_w": -218.99363708496094, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -1.4586548805236816, "rewards_train/1-l": -2.372159004211426, "rewards_train/1-w": 3.8940606117248535, "rewards_train/2-2": 3.9102468490600586, "rewards_train/2-w": -1.6032700538635254, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.266219615936279, "rewards_train/margins_1": 5.352715492248535, "rewards_train/margins_2": 5.513516902923584, "step": 483 }, { "epoch": 1.45, "learning_rate": 9.829996354535173e-07, "loss": 0.5032, "step": 484 }, { "epoch": 1.45, "logps_train/policy_1_2": -166.5821533203125, "logps_train/policy_1_l": -191.74595642089844, "logps_train/policy_1_w": -122.7481460571289, "logps_train/policy_2_2": -113.27088928222656, "logps_train/policy_2_w": -207.32931518554688, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -0.997669517993927, "rewards_train/1-l": -2.4757182598114014, "rewards_train/1-w": 3.042958974838257, "rewards_train/2-2": 2.5135364532470703, "rewards_train/2-w": -1.7032452821731567, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.518677234649658, "rewards_train/margins_1": 4.040628492832184, "rewards_train/margins_2": 4.216781735420227, "step": 484 }, { "epoch": 1.45, "logps_train/policy_1_2": -214.6346435546875, "logps_train/policy_1_l": -168.80978393554688, "logps_train/policy_1_w": -115.85064697265625, "logps_train/policy_2_2": -131.73956298828125, "logps_train/policy_2_w": -194.99822998046875, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -2.5142464637756348, "rewards_train/1-l": -2.430989980697632, "rewards_train/1-w": 3.2782411575317383, "rewards_train/2-2": 3.176433563232422, "rewards_train/2-w": -1.934978723526001, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.70923113822937, "rewards_train/margins_1": 5.792487621307373, "rewards_train/margins_2": 5.111412286758423, "step": 484 }, { "epoch": 1.45, "logps_train/policy_1_2": -236.0010986328125, "logps_train/policy_1_l": -237.49288940429688, "logps_train/policy_1_w": -138.82366943359375, "logps_train/policy_2_2": -141.211669921875, "logps_train/policy_2_w": -228.63665771484375, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -2.707141399383545, "rewards_train/1-l": -3.265305280685425, "rewards_train/1-w": 3.0289626121520996, "rewards_train/2-2": 3.2116456031799316, "rewards_train/2-w": -2.4203076362609863, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.294267892837524, "rewards_train/margins_1": 5.7361040115356445, "rewards_train/margins_2": 5.631953239440918, "step": 484 }, { "epoch": 1.45, "logps_train/policy_1_2": -177.87478637695312, "logps_train/policy_1_l": -180.7327117919922, "logps_train/policy_1_w": -111.18177795410156, "logps_train/policy_2_2": -111.50605010986328, "logps_train/policy_2_w": -179.95974731445312, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.4611119031906128, "rewards_train/1-l": -3.132145881652832, "rewards_train/1-w": 3.6617541313171387, "rewards_train/2-2": 3.097637176513672, "rewards_train/2-w": -0.817947506904602, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.793900012969971, "rewards_train/margins_1": 5.1228660345077515, "rewards_train/margins_2": 3.915584683418274, "step": 484 }, { "epoch": 1.45, "logps_train/policy_1_2": -165.34310913085938, "logps_train/policy_1_l": -160.2210235595703, "logps_train/policy_1_w": -128.24481201171875, "logps_train/policy_2_2": -109.55049896240234, "logps_train/policy_2_w": -209.7982177734375, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": -1.1552103757858276, "rewards_train/1-l": -2.819758415222168, "rewards_train/1-w": 3.207550525665283, "rewards_train/2-2": 2.821316957473755, "rewards_train/2-w": -2.110291004180908, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.027308940887451, "rewards_train/margins_1": 4.362760901451111, "rewards_train/margins_2": 4.931607961654663, "step": 485 }, { "epoch": 1.45, "logps_train/policy_1_2": -215.10000610351562, "logps_train/policy_1_l": -168.26950073242188, "logps_train/policy_1_w": -145.46029663085938, "logps_train/policy_2_2": -143.29470825195312, "logps_train/policy_2_w": -230.6566619873047, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -2.2692768573760986, "rewards_train/1-l": -2.1842994689941406, "rewards_train/1-w": 3.477993965148926, "rewards_train/2-2": 3.06554913520813, "rewards_train/2-w": -2.0141043663024902, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.662293434143066, "rewards_train/margins_1": 5.747270822525024, "rewards_train/margins_2": 5.07965350151062, "step": 485 }, { "epoch": 1.45, "logps_train/policy_1_2": -179.287109375, "logps_train/policy_1_l": -194.55877685546875, "logps_train/policy_1_w": -148.37196350097656, "logps_train/policy_2_2": -114.95892333984375, "logps_train/policy_2_w": -217.7252197265625, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.5761711597442627, "rewards_train/1-l": -2.8448169231414795, "rewards_train/1-w": 3.1100685596466064, "rewards_train/2-2": 3.0445375442504883, "rewards_train/2-w": -1.803771734237671, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.954885482788086, "rewards_train/margins_1": 4.686239719390869, "rewards_train/margins_2": 4.848309278488159, "step": 485 }, { "epoch": 1.45, "logps_train/policy_1_2": -177.0806884765625, "logps_train/policy_1_l": -220.41104125976562, "logps_train/policy_1_w": -96.54922485351562, "logps_train/policy_2_2": -99.395263671875, "logps_train/policy_2_w": -161.99537658691406, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -2.497130870819092, "rewards_train/1-l": -3.235536575317383, "rewards_train/1-w": 2.449570655822754, "rewards_train/2-2": 2.9057865142822266, "rewards_train/2-w": -1.7997334003448486, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.685107231140137, "rewards_train/margins_1": 4.946701526641846, "rewards_train/margins_2": 4.705519914627075, "step": 485 }, { "epoch": 1.46, "learning_rate": 9.634426239953074e-07, "loss": 0.4605, "step": 486 }, { "epoch": 1.46, "logps_train/policy_1_2": -219.45486450195312, "logps_train/policy_1_l": -218.16249084472656, "logps_train/policy_1_w": -117.30770874023438, "logps_train/policy_2_2": -154.78076171875, "logps_train/policy_2_w": -190.07577514648438, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -1.4872825145721436, "rewards_train/1-l": -3.024549961090088, "rewards_train/1-w": 3.7580959796905518, "rewards_train/2-2": 2.626415491104126, "rewards_train/2-w": -0.9048439264297485, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 6.78264594078064, "rewards_train/margins_1": 5.245378494262695, "rewards_train/margins_2": 3.5312594175338745, "step": 486 }, { "epoch": 1.46, "logps_train/policy_1_2": -205.4872283935547, "logps_train/policy_1_l": -163.02377319335938, "logps_train/policy_1_w": -123.93510437011719, "logps_train/policy_2_2": -120.33120727539062, "logps_train/policy_2_w": -204.88998413085938, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -2.8088788986206055, "rewards_train/1-l": -1.631148099899292, "rewards_train/1-w": 3.0211386680603027, "rewards_train/2-2": 2.8809423446655273, "rewards_train/2-w": -2.4856762886047363, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.652286767959595, "rewards_train/margins_1": 5.830017566680908, "rewards_train/margins_2": 5.366618633270264, "step": 486 }, { "epoch": 1.46, "logps_train/policy_1_2": -234.31109619140625, "logps_train/policy_1_l": -225.29574584960938, "logps_train/policy_1_w": -140.68814086914062, "logps_train/policy_2_2": -149.4952392578125, "logps_train/policy_2_w": -223.67877197265625, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -2.004936933517456, "rewards_train/1-l": -2.741683006286621, "rewards_train/1-w": 3.36541485786438, "rewards_train/2-2": 3.455211639404297, "rewards_train/2-w": -1.625251293182373, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.107097864151001, "rewards_train/margins_1": 5.370351791381836, "rewards_train/margins_2": 5.08046293258667, "step": 486 }, { "epoch": 1.46, "logps_train/policy_1_2": -225.61929321289062, "logps_train/policy_1_l": -201.0338134765625, "logps_train/policy_1_w": -124.53641510009766, "logps_train/policy_2_2": -142.2296905517578, "logps_train/policy_2_w": -194.9163055419922, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -2.193959951400757, "rewards_train/1-l": -2.4720332622528076, "rewards_train/1-w": 2.8794636726379395, "rewards_train/2-2": 3.1793739795684814, "rewards_train/2-w": -1.253642201423645, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.351496934890747, "rewards_train/margins_1": 5.073423624038696, "rewards_train/margins_2": 4.4330161809921265, "step": 486 }, { "epoch": 1.46, "logps_train/policy_1_2": -179.02554321289062, "logps_train/policy_1_l": -153.89599609375, "logps_train/policy_1_w": -136.08526611328125, "logps_train/policy_2_2": -101.98130798339844, "logps_train/policy_2_w": -217.23309326171875, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": -1.8841959238052368, "rewards_train/1-l": -1.9170409440994263, "rewards_train/1-w": 3.4127626419067383, "rewards_train/2-2": 2.754603624343872, "rewards_train/2-w": -1.8213553428649902, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.3298035860061646, "rewards_train/margins_1": 5.296958565711975, "rewards_train/margins_2": 4.575958967208862, "step": 487 }, { "epoch": 1.46, "logps_train/policy_1_2": -204.2353515625, "logps_train/policy_1_l": -221.271484375, "logps_train/policy_1_w": -119.87545776367188, "logps_train/policy_2_2": -128.81033325195312, "logps_train/policy_2_w": -198.8939208984375, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -2.0603508949279785, "rewards_train/1-l": -2.8394522666931152, "rewards_train/1-w": 2.780813217163086, "rewards_train/2-2": 2.779025077819824, "rewards_train/2-w": -1.8925163745880127, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.620265483856201, "rewards_train/margins_1": 4.8411641120910645, "rewards_train/margins_2": 4.671541452407837, "step": 487 }, { "epoch": 1.46, "logps_train/policy_1_2": -230.72659301757812, "logps_train/policy_1_l": -192.26461791992188, "logps_train/policy_1_w": -115.55438995361328, "logps_train/policy_2_2": -149.19406127929688, "logps_train/policy_2_w": -177.63400268554688, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.8882827758789062, "rewards_train/1-l": -1.86806321144104, "rewards_train/1-w": 3.109013557434082, "rewards_train/2-2": 3.49153208732605, "rewards_train/2-w": -1.0137903690338135, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.977076768875122, "rewards_train/margins_1": 4.997296333312988, "rewards_train/margins_2": 4.505322456359863, "step": 487 }, { "epoch": 1.46, "logps_train/policy_1_2": -182.64678955078125, "logps_train/policy_1_l": -140.459716796875, "logps_train/policy_1_w": -87.14222717285156, "logps_train/policy_2_2": -119.68535614013672, "logps_train/policy_2_w": -145.8587188720703, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": -1.748661994934082, "rewards_train/1-l": -1.789428472518921, "rewards_train/1-w": 2.5818710327148438, "rewards_train/2-2": 2.777362823486328, "rewards_train/2-w": -1.4675121307373047, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.371299505233765, "rewards_train/margins_1": 4.330533027648926, "rewards_train/margins_2": 4.244874954223633, "step": 487 }, { "epoch": 1.46, "learning_rate": 9.440355778282107e-07, "loss": 0.5509, "step": 488 }, { "epoch": 1.46, "logps_train/policy_1_2": -227.01315307617188, "logps_train/policy_1_l": -193.791015625, "logps_train/policy_1_w": -104.33826446533203, "logps_train/policy_2_2": -142.35531616210938, "logps_train/policy_2_w": -194.29901123046875, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -2.5222136974334717, "rewards_train/1-l": -2.627441167831421, "rewards_train/1-w": 3.035362720489502, "rewards_train/2-2": 3.224917411804199, "rewards_train/2-w": -2.603339433670044, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.662803888320923, "rewards_train/margins_1": 5.557576417922974, "rewards_train/margins_2": 5.828256845474243, "step": 488 }, { "epoch": 1.46, "logps_train/policy_1_2": -161.60418701171875, "logps_train/policy_1_l": -147.77532958984375, "logps_train/policy_1_w": -123.17131042480469, "logps_train/policy_2_2": -108.61065673828125, "logps_train/policy_2_w": -171.66433715820312, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -0.7102223038673401, "rewards_train/1-l": -1.7847716808319092, "rewards_train/1-w": 2.3572702407836914, "rewards_train/2-2": 2.8147153854370117, "rewards_train/2-w": -0.8070590496063232, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.142041921615601, "rewards_train/margins_1": 3.0674925446510315, "rewards_train/margins_2": 3.621774435043335, "step": 488 }, { "epoch": 1.46, "logps_train/policy_1_2": -240.122802734375, "logps_train/policy_1_l": -185.9749298095703, "logps_train/policy_1_w": -138.98822021484375, "logps_train/policy_2_2": -167.445556640625, "logps_train/policy_2_w": -222.8914031982422, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -1.4895260334014893, "rewards_train/1-l": -2.0353829860687256, "rewards_train/1-w": 3.193267583847046, "rewards_train/2-2": 3.0818111896514893, "rewards_train/2-w": -2.05378794670105, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.2286505699157715, "rewards_train/margins_1": 4.682793617248535, "rewards_train/margins_2": 5.135599136352539, "step": 488 }, { "epoch": 1.46, "logps_train/policy_1_2": -181.38478088378906, "logps_train/policy_1_l": -145.91732788085938, "logps_train/policy_1_w": -100.82420349121094, "logps_train/policy_2_2": -113.14958953857422, "logps_train/policy_2_w": -180.31716918945312, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -2.1636734008789062, "rewards_train/1-l": -2.5664384365081787, "rewards_train/1-w": 2.9732437133789062, "rewards_train/2-2": 2.589728593826294, "rewards_train/2-w": -2.138357400894165, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.539682149887085, "rewards_train/margins_1": 5.1369171142578125, "rewards_train/margins_2": 4.728085994720459, "step": 488 }, { "epoch": 1.46, "logps_train/policy_1_2": -218.76004028320312, "logps_train/policy_1_l": -178.6398162841797, "logps_train/policy_1_w": -147.80628967285156, "logps_train/policy_2_2": -140.951171875, "logps_train/policy_2_w": -227.08604431152344, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -1.7662389278411865, "rewards_train/1-l": -2.8844897747039795, "rewards_train/1-w": 3.112339973449707, "rewards_train/2-2": 3.4509756565093994, "rewards_train/2-w": -1.8609482049942017, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.9968297481536865, "rewards_train/margins_1": 4.8785789012908936, "rewards_train/margins_2": 5.311923861503601, "step": 489 }, { "epoch": 1.46, "logps_train/policy_1_2": -211.60800170898438, "logps_train/policy_1_l": -166.69664001464844, "logps_train/policy_1_w": -141.897216796875, "logps_train/policy_2_2": -138.61680603027344, "logps_train/policy_2_w": -230.33987426757812, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -1.9510338306427002, "rewards_train/1-l": -2.419957160949707, "rewards_train/1-w": 3.4780282974243164, "rewards_train/2-2": 2.8149681091308594, "rewards_train/2-w": -2.0687527656555176, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.897985458374023, "rewards_train/margins_1": 5.429062128067017, "rewards_train/margins_2": 4.883720874786377, "step": 489 }, { "epoch": 1.46, "logps_train/policy_1_2": -167.1319580078125, "logps_train/policy_1_l": -155.80735778808594, "logps_train/policy_1_w": -126.77120971679688, "logps_train/policy_2_2": -113.59283447265625, "logps_train/policy_2_w": -201.96519470214844, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.0290162563323975, "rewards_train/1-l": -1.524876356124878, "rewards_train/1-w": 3.189676284790039, "rewards_train/2-2": 2.5153255462646484, "rewards_train/2-w": -1.7726914882659912, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.714552640914917, "rewards_train/margins_1": 4.2186925411224365, "rewards_train/margins_2": 4.28801703453064, "step": 489 }, { "epoch": 1.46, "logps_train/policy_1_2": -176.18499755859375, "logps_train/policy_1_l": -165.7349853515625, "logps_train/policy_1_w": -124.50920104980469, "logps_train/policy_2_2": -114.43379211425781, "logps_train/policy_2_w": -200.41400146484375, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.5059990882873535, "rewards_train/1-l": -2.5234999656677246, "rewards_train/1-w": 3.0440011024475098, "rewards_train/2-2": 2.604276657104492, "rewards_train/2-w": -2.0445244312286377, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.567501068115234, "rewards_train/margins_1": 4.550000190734863, "rewards_train/margins_2": 4.64880108833313, "step": 489 }, { "epoch": 1.47, "learning_rate": 9.247803910457226e-07, "loss": 0.5389, "step": 490 }, { "epoch": 1.47, "logps_train/policy_1_2": -109.60610961914062, "logps_train/policy_1_l": -136.61309814453125, "logps_train/policy_1_w": -93.33940887451172, "logps_train/policy_2_2": -66.55351257324219, "logps_train/policy_2_w": -140.83255004882812, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -89.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -0.5430326461791992, "rewards_train/1-l": -1.509552240371704, "rewards_train/1-w": 2.646918773651123, "rewards_train/2-2": 2.2493367195129395, "rewards_train/2-w": -0.6793496012687683, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.156471014022827, "rewards_train/margins_1": 3.1899514198303223, "rewards_train/margins_2": 2.9286863207817078, "step": 490 }, { "epoch": 1.47, "logps_train/policy_1_2": -159.6281280517578, "logps_train/policy_1_l": -155.2849578857422, "logps_train/policy_1_w": -127.33116912841797, "logps_train/policy_2_2": -97.65275573730469, "logps_train/policy_2_w": -192.06240844726562, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -1.687422275543213, "rewards_train/1-l": -2.7709760665893555, "rewards_train/1-w": 2.629187822341919, "rewards_train/2-2": 2.503474473953247, "rewards_train/2-w": -1.5660057067871094, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.400163888931274, "rewards_train/margins_1": 4.316610097885132, "rewards_train/margins_2": 4.0694801807403564, "step": 490 }, { "epoch": 1.47, "logps_train/policy_1_2": -216.0855712890625, "logps_train/policy_1_l": -182.247802734375, "logps_train/policy_1_w": -148.47601318359375, "logps_train/policy_2_2": -134.7963409423828, "logps_train/policy_2_w": -244.36044311523438, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -2.2294697761535645, "rewards_train/1-l": -2.2257721424102783, "rewards_train/1-w": 3.296539783477783, "rewards_train/2-2": 2.761277198791504, "rewards_train/2-w": -2.445028781890869, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.5223119258880615, "rewards_train/margins_1": 5.526009559631348, "rewards_train/margins_2": 5.206305980682373, "step": 490 }, { "epoch": 1.47, "logps_train/policy_1_2": -222.74395751953125, "logps_train/policy_1_l": -182.16998291015625, "logps_train/policy_1_w": -133.44338989257812, "logps_train/policy_2_2": -145.0277099609375, "logps_train/policy_2_w": -209.82553100585938, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -1.731034755706787, "rewards_train/1-l": -2.972259044647217, "rewards_train/1-w": 2.907613754272461, "rewards_train/2-2": 3.5694947242736816, "rewards_train/2-w": -1.6630228757858276, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.879872798919678, "rewards_train/margins_1": 4.638648509979248, "rewards_train/margins_2": 5.232517600059509, "step": 490 }, { "epoch": 1.47, "logps_train/policy_1_2": -199.00375366210938, "logps_train/policy_1_l": -260.98187255859375, "logps_train/policy_1_w": -151.57138061523438, "logps_train/policy_2_2": -131.8411102294922, "logps_train/policy_2_w": -240.17599487304688, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -228.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -1.5238120555877686, "rewards_train/1-l": -3.2272632122039795, "rewards_train/1-w": 3.6842682361602783, "rewards_train/2-2": 3.090498924255371, "rewards_train/2-w": -2.0801002979278564, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.911531448364258, "rewards_train/margins_1": 5.208080291748047, "rewards_train/margins_2": 5.1705992221832275, "step": 491 }, { "epoch": 1.47, "logps_train/policy_1_2": -133.24969482421875, "logps_train/policy_1_l": -149.25921630859375, "logps_train/policy_1_w": -101.27523040771484, "logps_train/policy_2_2": -86.15980529785156, "logps_train/policy_2_w": -154.62974548339844, "logps_train/ref_1_2": -122.5, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": -1.088446021080017, "rewards_train/1-l": -2.346038341522217, "rewards_train/1-w": 1.9466955661773682, "rewards_train/2-2": 2.265073776245117, "rewards_train/2-w": -1.5616074800491333, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.292733907699585, "rewards_train/margins_1": 3.0351415872573853, "rewards_train/margins_2": 3.8266812562942505, "step": 491 }, { "epoch": 1.47, "logps_train/policy_1_2": -179.9951934814453, "logps_train/policy_1_l": -123.85643005371094, "logps_train/policy_1_w": -111.65977478027344, "logps_train/policy_2_2": -112.17674255371094, "logps_train/policy_2_w": -176.7251739501953, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.9963939189910889, "rewards_train/1-l": -2.1413064002990723, "rewards_train/1-w": 2.4732799530029297, "rewards_train/2-2": 2.4190444946289062, "rewards_train/2-w": -1.7156816720962524, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.614586353302002, "rewards_train/margins_1": 4.4696738719940186, "rewards_train/margins_2": 4.134726166725159, "step": 491 }, { "epoch": 1.47, "logps_train/policy_1_2": -158.78512573242188, "logps_train/policy_1_l": -104.12977600097656, "logps_train/policy_1_w": -74.46331024169922, "logps_train/policy_2_2": -97.22177124023438, "logps_train/policy_2_w": -138.75831604003906, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -84.5, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -115.5, "rewards_train/1-2": -1.9839808940887451, "rewards_train/1-l": -1.9921035766601562, "rewards_train/1-w": 2.1540589332580566, "rewards_train/2-2": 2.2536046504974365, "rewards_train/2-w": -2.299757957458496, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.146162509918213, "rewards_train/margins_1": 4.138039827346802, "rewards_train/margins_2": 4.553362607955933, "step": 491 }, { "epoch": 1.47, "learning_rate": 9.05678942920127e-07, "loss": 0.7553, "step": 492 }, { "epoch": 1.47, "logps_train/policy_1_2": -181.78097534179688, "logps_train/policy_1_l": -188.94769287109375, "logps_train/policy_1_w": -130.49356079101562, "logps_train/policy_2_2": -126.82746887207031, "logps_train/policy_2_w": -192.51364135742188, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -0.5683313012123108, "rewards_train/1-l": -1.5210380554199219, "rewards_train/1-w": 3.3436121940612793, "rewards_train/2-2": 2.9516279697418213, "rewards_train/2-w": -0.5415985584259033, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.864650249481201, "rewards_train/margins_1": 3.91194349527359, "rewards_train/margins_2": 3.4932265281677246, "step": 492 }, { "epoch": 1.47, "logps_train/policy_1_2": -218.10171508789062, "logps_train/policy_1_l": -199.0816650390625, "logps_train/policy_1_w": -169.3673095703125, "logps_train/policy_2_2": -151.89285278320312, "logps_train/policy_2_w": -242.88394165039062, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -207.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": -0.8664226531982422, "rewards_train/1-l": -2.754845142364502, "rewards_train/1-w": 3.798424482345581, "rewards_train/2-2": 3.5529022216796875, "rewards_train/2-w": -1.4860508441925049, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.553269624710083, "rewards_train/margins_1": 4.664847135543823, "rewards_train/margins_2": 5.038953065872192, "step": 492 }, { "epoch": 1.47, "logps_train/policy_1_2": -170.56614685058594, "logps_train/policy_1_l": -160.53253173828125, "logps_train/policy_1_w": -99.47586059570312, "logps_train/policy_2_2": -113.2070541381836, "logps_train/policy_2_w": -160.19808959960938, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.1339585781097412, "rewards_train/1-l": -2.344172477722168, "rewards_train/1-w": 2.7331762313842773, "rewards_train/2-2": 2.5871076583862305, "rewards_train/2-w": -1.2069180011749268, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.077348709106445, "rewards_train/margins_1": 3.8671348094940186, "rewards_train/margins_2": 3.7940256595611572, "step": 492 }, { "epoch": 1.47, "logps_train/policy_1_2": -176.05868530273438, "logps_train/policy_1_l": -170.2769775390625, "logps_train/policy_1_w": -100.54557037353516, "logps_train/policy_2_2": -125.01124572753906, "logps_train/policy_2_w": -148.26181030273438, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": -0.46524369716644287, "rewards_train/1-l": -2.523888349533081, "rewards_train/1-w": 2.7502281665802, "rewards_train/2-2": 2.9887194633483887, "rewards_train/2-w": -0.28340694308280945, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.274116516113281, "rewards_train/margins_1": 3.215471863746643, "rewards_train/margins_2": 3.272126406431198, "step": 492 }, { "epoch": 1.48, "logps_train/policy_1_2": -223.17483520507812, "logps_train/policy_1_l": -176.36929321289062, "logps_train/policy_1_w": -104.27819061279297, "logps_train/policy_2_2": -137.52703857421875, "logps_train/policy_2_w": -184.5853729248047, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -2.5612335205078125, "rewards_train/1-l": -1.8705962896347046, "rewards_train/1-w": 2.912024974822998, "rewards_train/2-2": 2.8512024879455566, "rewards_train/2-w": -1.5483801364898682, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.782621264457703, "rewards_train/margins_1": 5.4732584953308105, "rewards_train/margins_2": 4.399582624435425, "step": 493 }, { "epoch": 1.48, "logps_train/policy_1_2": -168.8197021484375, "logps_train/policy_1_l": -173.8885498046875, "logps_train/policy_1_w": -86.39282989501953, "logps_train/policy_2_2": -93.0218505859375, "logps_train/policy_2_w": -146.157470703125, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": -2.3453497886657715, "rewards_train/1-l": -1.8647947311401367, "rewards_train/1-w": 2.36306095123291, "rewards_train/2-2": 2.6388304233551025, "rewards_train/2-w": -1.2899665832519531, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.227855682373047, "rewards_train/margins_1": 4.708410739898682, "rewards_train/margins_2": 3.9287970066070557, "step": 493 }, { "epoch": 1.48, "logps_train/policy_1_2": -242.31298828125, "logps_train/policy_1_l": -164.72573852539062, "logps_train/policy_1_w": -93.11880493164062, "logps_train/policy_2_2": -165.54244995117188, "logps_train/policy_2_w": -157.13314819335938, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -2.1273927688598633, "rewards_train/1-l": -2.5346827507019043, "rewards_train/1-w": 2.588510036468506, "rewards_train/2-2": 3.008133888244629, "rewards_train/2-w": -1.6875332593917847, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.12319278717041, "rewards_train/margins_1": 4.715902805328369, "rewards_train/margins_2": 4.695667147636414, "step": 493 }, { "epoch": 1.48, "logps_train/policy_1_2": -116.58234405517578, "logps_train/policy_1_l": -190.91152954101562, "logps_train/policy_1_w": -88.28707885742188, "logps_train/policy_2_2": -80.55088806152344, "logps_train/policy_2_w": -137.09524536132812, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": -0.7920233011245728, "rewards_train/1-l": -2.536661148071289, "rewards_train/1-w": 2.254225730895996, "rewards_train/2-2": 1.6465224027633667, "rewards_train/2-w": -0.9134546518325806, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.790886878967285, "rewards_train/margins_1": 3.046249032020569, "rewards_train/margins_2": 2.5599770545959473, "step": 493 }, { "epoch": 1.48, "learning_rate": 8.867330977190877e-07, "loss": 0.5975, "step": 494 }, { "epoch": 1.48, "logps_train/policy_1_2": -265.64190673828125, "logps_train/policy_1_l": -186.83038330078125, "logps_train/policy_1_w": -143.51324462890625, "logps_train/policy_2_2": -170.96929931640625, "logps_train/policy_2_w": -224.73211669921875, "logps_train/ref_1_2": -237.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -2.825910806655884, "rewards_train/1-l": -1.955596923828125, "rewards_train/1-w": 3.8330509662628174, "rewards_train/2-2": 3.8202571868896484, "rewards_train/2-w": -1.684931755065918, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.788647890090942, "rewards_train/margins_1": 6.658961772918701, "rewards_train/margins_2": 5.505188941955566, "step": 494 }, { "epoch": 1.48, "logps_train/policy_1_2": -253.59869384765625, "logps_train/policy_1_l": -181.1837158203125, "logps_train/policy_1_w": -127.1314697265625, "logps_train/policy_2_2": -176.9146270751953, "logps_train/policy_2_w": -204.5931396484375, "logps_train/ref_1_2": -238.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -212.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.5196354389190674, "rewards_train/1-l": -2.279797077178955, "rewards_train/1-w": 3.3411502838134766, "rewards_train/2-2": 3.530022621154785, "rewards_train/2-w": -1.690563678741455, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.620947360992432, "rewards_train/margins_1": 4.860785722732544, "rewards_train/margins_2": 5.22058629989624, "step": 494 }, { "epoch": 1.48, "logps_train/policy_1_2": -182.13101196289062, "logps_train/policy_1_l": -219.25991821289062, "logps_train/policy_1_w": -176.8206787109375, "logps_train/policy_2_2": -115.02992248535156, "logps_train/policy_2_w": -282.98468017578125, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -217.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -251.0, "rewards_train/1-2": -1.2435685396194458, "rewards_train/1-l": -2.9517736434936523, "rewards_train/1-w": 4.02652645111084, "rewards_train/2-2": 2.8522801399230957, "rewards_train/2-w": -3.2047200202941895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.978300094604492, "rewards_train/margins_1": 5.270094990730286, "rewards_train/margins_2": 6.057000160217285, "step": 494 }, { "epoch": 1.48, "logps_train/policy_1_2": -245.6353759765625, "logps_train/policy_1_l": -201.35508728027344, "logps_train/policy_1_w": -135.252197265625, "logps_train/policy_2_2": -158.0540008544922, "logps_train/policy_2_w": -210.98062133789062, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -2.1645140647888184, "rewards_train/1-l": -2.765587329864502, "rewards_train/1-w": 3.259155750274658, "rewards_train/2-2": 3.7055368423461914, "rewards_train/2-w": -1.4613434076309204, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.02474308013916, "rewards_train/margins_1": 5.423669815063477, "rewards_train/margins_2": 5.166880249977112, "step": 494 }, { "epoch": 1.48, "logps_train/policy_1_2": -241.395751953125, "logps_train/policy_1_l": -190.3289031982422, "logps_train/policy_1_w": -167.5462646484375, "logps_train/policy_2_2": -141.05606079101562, "logps_train/policy_2_w": -270.35858154296875, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -240.0, "rewards_train/1-2": -2.3993396759033203, "rewards_train/1-l": -2.3544118404388428, "rewards_train/1-w": 4.091858863830566, "rewards_train/2-2": 3.8459572792053223, "rewards_train/2-w": -3.07023286819458, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.446270704269409, "rewards_train/margins_1": 6.491198539733887, "rewards_train/margins_2": 6.916190147399902, "step": 495 }, { "epoch": 1.48, "logps_train/policy_1_2": -223.2415313720703, "logps_train/policy_1_l": -305.26336669921875, "logps_train/policy_1_w": -168.3956756591797, "logps_train/policy_2_2": -141.65310668945312, "logps_train/policy_2_w": -247.936767578125, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -266.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": -1.4393882751464844, "rewards_train/1-l": -3.9874701499938965, "rewards_train/1-w": 3.6991043090820312, "rewards_train/2-2": 3.72961163520813, "rewards_train/2-w": -1.357351303100586, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.686574459075928, "rewards_train/margins_1": 5.138492584228516, "rewards_train/margins_2": 5.086962938308716, "step": 495 }, { "epoch": 1.48, "logps_train/policy_1_2": -205.28359985351562, "logps_train/policy_1_l": -136.36424255371094, "logps_train/policy_1_w": -125.24557495117188, "logps_train/policy_2_2": -143.77908325195312, "logps_train/policy_2_w": -186.70840454101562, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -1.2775774002075195, "rewards_train/1-l": -1.5836900472640991, "rewards_train/1-w": 3.1585474014282227, "rewards_train/2-2": 2.813596725463867, "rewards_train/2-w": -1.1462302207946777, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.742237448692322, "rewards_train/margins_1": 4.436124801635742, "rewards_train/margins_2": 3.959826946258545, "step": 495 }, { "epoch": 1.48, "logps_train/policy_1_2": -196.74330139160156, "logps_train/policy_1_l": -152.4044189453125, "logps_train/policy_1_w": -100.6180419921875, "logps_train/policy_2_2": -106.71501159667969, "logps_train/policy_2_w": -190.91668701171875, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -2.8180809020996094, "rewards_train/1-l": -2.3869271278381348, "rewards_train/1-w": 3.33780574798584, "rewards_train/2-2": 3.222639799118042, "rewards_train/2-w": -2.5151069164276123, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.724732875823975, "rewards_train/margins_1": 6.155886650085449, "rewards_train/margins_2": 5.737746715545654, "step": 495 }, { "epoch": 1.49, "learning_rate": 8.679447045236964e-07, "loss": 0.3954, "step": 496 }, { "epoch": 1.49, "logps_train/policy_1_2": -281.74462890625, "logps_train/policy_1_l": -165.99111938476562, "logps_train/policy_1_w": -149.71466064453125, "logps_train/policy_2_2": -192.8277587890625, "logps_train/policy_2_w": -224.55711364746094, "logps_train/ref_1_2": -262.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -233.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -1.9400858879089355, "rewards_train/1-l": -2.0551657676696777, "rewards_train/1-w": 3.7660326957702637, "rewards_train/2-2": 4.05003547668457, "rewards_train/2-w": -1.2432117462158203, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.821198463439941, "rewards_train/margins_1": 5.706118583679199, "rewards_train/margins_2": 5.293247222900391, "step": 496 }, { "epoch": 1.49, "logps_train/policy_1_2": -180.0683135986328, "logps_train/policy_1_l": -164.076416015625, "logps_train/policy_1_w": -106.86846923828125, "logps_train/policy_2_2": -128.5980987548828, "logps_train/policy_2_w": -168.11085510253906, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -1.0664018392562866, "rewards_train/1-l": -2.3866453170776367, "rewards_train/1-w": 2.944793701171875, "rewards_train/2-2": 2.589409351348877, "rewards_train/2-w": -1.1985859870910645, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.331439018249512, "rewards_train/margins_1": 4.011195540428162, "rewards_train/margins_2": 3.7879953384399414, "step": 496 }, { "epoch": 1.49, "logps_train/policy_1_2": -148.7445068359375, "logps_train/policy_1_l": -169.13290405273438, "logps_train/policy_1_w": -110.14372253417969, "logps_train/policy_2_2": -91.50505828857422, "logps_train/policy_2_w": -182.67236328125, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -114.5, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -1.2512081861495972, "rewards_train/1-l": -2.873836040496826, "rewards_train/1-w": 2.9726390838623047, "rewards_train/2-2": 2.3149240016937256, "rewards_train/2-w": -1.8333508968353271, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.846475124359131, "rewards_train/margins_1": 4.223847270011902, "rewards_train/margins_2": 4.148274898529053, "step": 496 }, { "epoch": 1.49, "logps_train/policy_1_2": -148.89915466308594, "logps_train/policy_1_l": -169.76791381835938, "logps_train/policy_1_w": -126.82279968261719, "logps_train/policy_2_2": -101.0706558227539, "logps_train/policy_2_w": -186.7899169921875, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -0.943432092666626, "rewards_train/1-l": -2.324448347091675, "rewards_train/1-w": 2.903658390045166, "rewards_train/2-2": 1.9472315311431885, "rewards_train/2-w": -1.3218616247177124, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.228106737136841, "rewards_train/margins_1": 3.847090482711792, "rewards_train/margins_2": 3.269093155860901, "step": 496 }, { "epoch": 1.49, "logps_train/policy_1_2": -174.32875061035156, "logps_train/policy_1_l": -143.8782501220703, "logps_train/policy_1_w": -123.76302337646484, "logps_train/policy_2_2": -110.78411865234375, "logps_train/policy_2_w": -182.03323364257812, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -1.2432265281677246, "rewards_train/1-l": -1.2806963920593262, "rewards_train/1-w": 3.0744786262512207, "rewards_train/2-2": 2.98008394241333, "rewards_train/2-w": -1.0876984596252441, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.355175018310547, "rewards_train/margins_1": 4.317705154418945, "rewards_train/margins_2": 4.067782402038574, "step": 497 }, { "epoch": 1.49, "logps_train/policy_1_2": -161.439697265625, "logps_train/policy_1_l": -201.77322387695312, "logps_train/policy_1_w": -134.72738647460938, "logps_train/policy_2_2": -104.77367401123047, "logps_train/policy_2_w": -215.232177734375, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": -0.9677006602287292, "rewards_train/1-l": -2.280111789703369, "rewards_train/1-w": 3.538198471069336, "rewards_train/2-2": 2.713550567626953, "rewards_train/2-w": -1.5997815132141113, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.818310260772705, "rewards_train/margins_1": 4.505899131298065, "rewards_train/margins_2": 4.3133320808410645, "step": 497 }, { "epoch": 1.49, "logps_train/policy_1_2": -198.73818969726562, "logps_train/policy_1_l": -200.6949462890625, "logps_train/policy_1_w": -160.81715393066406, "logps_train/policy_2_2": -144.65347290039062, "logps_train/policy_2_w": -227.53297424316406, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": 0.046566955745220184, "rewards_train/1-l": -2.6920523643493652, "rewards_train/1-w": 3.30422306060791, "rewards_train/2-2": 3.4691009521484375, "rewards_train/2-w": -1.0665805339813232, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.996275424957275, "rewards_train/margins_1": 3.25765610486269, "rewards_train/margins_2": 4.535681486129761, "step": 497 }, { "epoch": 1.49, "logps_train/policy_1_2": -204.44296264648438, "logps_train/policy_1_l": -203.34164428710938, "logps_train/policy_1_w": -113.32713317871094, "logps_train/policy_2_2": -128.63059997558594, "logps_train/policy_2_w": -184.24514770507812, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -2.0021092891693115, "rewards_train/1-l": -1.9861173629760742, "rewards_train/1-w": 2.7901995182037354, "rewards_train/2-2": 2.8836190700531006, "rewards_train/2-w": -1.5807654857635498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.77631688117981, "rewards_train/margins_1": 4.792308807373047, "rewards_train/margins_2": 4.46438455581665, "step": 497 }, { "epoch": 1.49, "learning_rate": 8.493155970480074e-07, "loss": 0.5238, "step": 498 }, { "epoch": 1.49, "logps_train/policy_1_2": -246.0445098876953, "logps_train/policy_1_l": -216.75082397460938, "logps_train/policy_1_w": -161.39266967773438, "logps_train/policy_2_2": -163.83248901367188, "logps_train/policy_2_w": -244.7977294921875, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -201.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": -1.9978114366531372, "rewards_train/1-l": -2.9991066455841064, "rewards_train/1-w": 3.977139949798584, "rewards_train/2-2": 3.4308876991271973, "rewards_train/2-w": -1.3772342205047607, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.97624659538269, "rewards_train/margins_1": 5.974951386451721, "rewards_train/margins_2": 4.808121919631958, "step": 498 }, { "epoch": 1.49, "logps_train/policy_1_2": -207.4300537109375, "logps_train/policy_1_l": -221.24981689453125, "logps_train/policy_1_w": -136.34519958496094, "logps_train/policy_2_2": -131.18487548828125, "logps_train/policy_2_w": -218.21961975097656, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.8792362213134766, "rewards_train/1-l": -2.9816226959228516, "rewards_train/1-w": 3.378176212310791, "rewards_train/2-2": 3.0754332542419434, "rewards_train/2-w": -2.105947256088257, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.359798908233643, "rewards_train/margins_1": 5.257412433624268, "rewards_train/margins_2": 5.1813805103302, "step": 498 }, { "epoch": 1.49, "logps_train/policy_1_2": -176.096923828125, "logps_train/policy_1_l": -157.4296112060547, "logps_train/policy_1_w": -129.64663696289062, "logps_train/policy_2_2": -126.26830291748047, "logps_train/policy_2_w": -191.72488403320312, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.9862549304962158, "rewards_train/1-l": -2.519035816192627, "rewards_train/1-w": 3.8549647331237793, "rewards_train/2-2": 2.6579349040985107, "rewards_train/2-w": 0.02614492177963257, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 6.374000549316406, "rewards_train/margins_1": 4.841219663619995, "rewards_train/margins_2": 2.631789982318878, "step": 498 }, { "epoch": 1.49, "logps_train/policy_1_2": -187.43902587890625, "logps_train/policy_1_l": -189.13160705566406, "logps_train/policy_1_w": -108.20469665527344, "logps_train/policy_2_2": -118.9317626953125, "logps_train/policy_2_w": -183.91571044921875, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -1.8401923179626465, "rewards_train/1-l": -2.7172627449035645, "rewards_train/1-w": 2.6679089069366455, "rewards_train/2-2": 2.589783191680908, "rewards_train/2-w": -1.9828797578811646, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.38517165184021, "rewards_train/margins_1": 4.508101224899292, "rewards_train/margins_2": 4.572662949562073, "step": 498 }, { "epoch": 1.49, "logps_train/policy_1_2": -215.35330200195312, "logps_train/policy_1_l": -205.09121704101562, "logps_train/policy_1_w": -98.84159088134766, "logps_train/policy_2_2": -141.8566131591797, "logps_train/policy_2_w": -159.24371337890625, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -1.380643367767334, "rewards_train/1-l": -3.398183822631836, "rewards_train/1-w": 2.8549036979675293, "rewards_train/2-2": 3.548713207244873, "rewards_train/2-w": -0.8040592670440674, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.253087520599365, "rewards_train/margins_1": 4.235547065734863, "rewards_train/margins_2": 4.35277247428894, "step": 499 }, { "epoch": 1.49, "logps_train/policy_1_2": -200.0404052734375, "logps_train/policy_1_l": -162.03599548339844, "logps_train/policy_1_w": -135.28643798828125, "logps_train/policy_2_2": -125.42893981933594, "logps_train/policy_2_w": -199.55311584472656, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.92083740234375, "rewards_train/1-l": -1.9346050024032593, "rewards_train/1-w": 2.9932308197021484, "rewards_train/2-2": 2.9531991481781006, "rewards_train/2-w": -1.3849990367889404, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.927835822105408, "rewards_train/margins_1": 4.914068222045898, "rewards_train/margins_2": 4.338198184967041, "step": 499 }, { "epoch": 1.49, "logps_train/policy_1_2": -206.10195922851562, "logps_train/policy_1_l": -220.00685119628906, "logps_train/policy_1_w": -134.2451171875, "logps_train/policy_2_2": -137.8673858642578, "logps_train/policy_2_w": -199.63677978515625, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -1.0812883377075195, "rewards_train/1-l": -2.578810691833496, "rewards_train/1-w": 3.529395580291748, "rewards_train/2-2": 3.697636365890503, "rewards_train/2-w": -0.8394597768783569, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.108206272125244, "rewards_train/margins_1": 4.610683917999268, "rewards_train/margins_2": 4.53709614276886, "step": 499 }, { "epoch": 1.49, "logps_train/policy_1_2": -206.80908203125, "logps_train/policy_1_l": -165.75563049316406, "logps_train/policy_1_w": -119.68621826171875, "logps_train/policy_2_2": -144.66087341308594, "logps_train/policy_2_w": -176.6619415283203, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -1.065284252166748, "rewards_train/1-l": -1.4999761581420898, "rewards_train/1-w": 3.6532535552978516, "rewards_train/2-2": 3.2542245388031006, "rewards_train/2-w": 0.024431690573692322, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.153229713439941, "rewards_train/margins_1": 4.7185378074646, "rewards_train/margins_2": 3.2297928482294083, "step": 499 }, { "epoch": 1.5, "learning_rate": 8.30847593460069e-07, "loss": 0.5207, "step": 500 }, { "epoch": 1.5, "logps_train/policy_1_2": -153.5037841796875, "logps_train/policy_1_l": -198.56930541992188, "logps_train/policy_1_w": -143.40484619140625, "logps_train/policy_2_2": -94.28221130371094, "logps_train/policy_2_w": -224.72015380859375, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -1.066784381866455, "rewards_train/1-l": -2.3577120304107666, "rewards_train/1-w": 3.4556097984313965, "rewards_train/2-2": 2.579005479812622, "rewards_train/2-w": -1.854438066482544, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.813321828842163, "rewards_train/margins_1": 4.522394180297852, "rewards_train/margins_2": 4.433443546295166, "step": 500 }, { "epoch": 1.5, "logps_train/policy_1_2": -173.297607421875, "logps_train/policy_1_l": -103.56140899658203, "logps_train/policy_1_w": -109.53363800048828, "logps_train/policy_2_2": -105.22166442871094, "logps_train/policy_2_w": -185.3824920654297, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -2.3364009857177734, "rewards_train/1-l": -1.3720831871032715, "rewards_train/1-w": 2.7204642295837402, "rewards_train/2-2": 2.2413108348846436, "rewards_train/2-w": -1.995182991027832, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.092547416687012, "rewards_train/margins_1": 5.056865215301514, "rewards_train/margins_2": 4.236493825912476, "step": 500 }, { "epoch": 1.5, "logps_train/policy_1_2": -218.64984130859375, "logps_train/policy_1_l": -187.84695434570312, "logps_train/policy_1_w": -111.34353637695312, "logps_train/policy_2_2": -147.4604034423828, "logps_train/policy_2_w": -174.3992919921875, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -1.6018991470336914, "rewards_train/1-l": -2.5952181816101074, "rewards_train/1-w": 2.9925994873046875, "rewards_train/2-2": 2.7713427543640137, "rewards_train/2-w": -1.052770972251892, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.587817668914795, "rewards_train/margins_1": 4.594498634338379, "rewards_train/margins_2": 3.8241137266159058, "step": 500 }, { "epoch": 1.5, "logps_train/policy_1_2": -156.90924072265625, "logps_train/policy_1_l": -153.2631072998047, "logps_train/policy_1_w": -122.21428680419922, "logps_train/policy_2_2": -105.5926513671875, "logps_train/policy_2_w": -182.86041259765625, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -0.9109445810317993, "rewards_train/1-l": -1.6995038986206055, "rewards_train/1-w": 3.1708555221557617, "rewards_train/2-2": 2.5143184661865234, "rewards_train/2-w": -0.9731495380401611, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.870359420776367, "rewards_train/margins_1": 4.081800103187561, "rewards_train/margins_2": 3.4874680042266846, "step": 500 }, { "epoch": 1.5, "logps_train/policy_1_2": -157.27188110351562, "logps_train/policy_1_l": -143.1219482421875, "logps_train/policy_1_w": -96.6134033203125, "logps_train/policy_2_2": -98.96949768066406, "logps_train/policy_2_w": -153.6960906982422, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -1.3410061597824097, "rewards_train/1-l": -2.2642455101013184, "rewards_train/1-w": 2.2941274642944336, "rewards_train/2-2": 2.442308187484741, "rewards_train/2-w": -1.6380665302276611, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.558372974395752, "rewards_train/margins_1": 3.6351336240768433, "rewards_train/margins_2": 4.080374717712402, "step": 501 }, { "epoch": 1.5, "logps_train/policy_1_2": -140.90293884277344, "logps_train/policy_1_l": -129.14996337890625, "logps_train/policy_1_w": -101.42082977294922, "logps_train/policy_2_2": -88.05174255371094, "logps_train/policy_2_w": -175.3877410888672, "logps_train/ref_1_2": -127.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -111.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -1.391635775566101, "rewards_train/1-l": -1.7988345623016357, "rewards_train/1-w": 3.050299882888794, "rewards_train/2-2": 2.2967300415039062, "rewards_train/2-w": -1.7161173820495605, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.84913444519043, "rewards_train/margins_1": 4.441935658454895, "rewards_train/margins_2": 4.012847423553467, "step": 501 }, { "epoch": 1.5, "logps_train/policy_1_2": -222.92593383789062, "logps_train/policy_1_l": -227.78900146484375, "logps_train/policy_1_w": -159.91757202148438, "logps_train/policy_2_2": -144.5371856689453, "logps_train/policy_2_w": -244.306884765625, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": -1.7941553592681885, "rewards_train/1-l": -1.9064382314682007, "rewards_train/1-w": 3.821915626525879, "rewards_train/2-2": 3.110344171524048, "rewards_train/2-w": -1.4963126182556152, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.72835385799408, "rewards_train/margins_1": 5.616070985794067, "rewards_train/margins_2": 4.606656789779663, "step": 501 }, { "epoch": 1.5, "logps_train/policy_1_2": -169.62347412109375, "logps_train/policy_1_l": -155.41229248046875, "logps_train/policy_1_w": -95.47335815429688, "logps_train/policy_2_2": -106.90422058105469, "logps_train/policy_2_w": -146.52410888671875, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -1.5967226028442383, "rewards_train/1-l": -2.405486583709717, "rewards_train/1-w": 2.470633029937744, "rewards_train/2-2": 2.617780923843384, "rewards_train/2-w": -0.9266307353973389, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.876119613647461, "rewards_train/margins_1": 4.067355632781982, "rewards_train/margins_2": 3.5444116592407227, "step": 501 }, { "epoch": 1.5, "learning_rate": 8.125424962044742e-07, "loss": 0.6464, "step": 502 }, { "epoch": 1.5, "logps_train/policy_1_2": -175.14471435546875, "logps_train/policy_1_l": -196.33450317382812, "logps_train/policy_1_w": -137.56314086914062, "logps_train/policy_2_2": -125.64738464355469, "logps_train/policy_2_w": -202.359619140625, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -1.0787291526794434, "rewards_train/1-l": -2.452199935913086, "rewards_train/1-w": 3.440561532974243, "rewards_train/2-2": 2.1052803993225098, "rewards_train/2-w": -0.9043210744857788, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.892761468887329, "rewards_train/margins_1": 4.5192906856536865, "rewards_train/margins_2": 3.0096014738082886, "step": 502 }, { "epoch": 1.5, "logps_train/policy_1_2": -225.73658752441406, "logps_train/policy_1_l": -179.17361450195312, "logps_train/policy_1_w": -133.14907836914062, "logps_train/policy_2_2": -152.63502502441406, "logps_train/policy_2_w": -204.81246948242188, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -1.0228767395019531, "rewards_train/1-l": -2.466677188873291, "rewards_train/1-w": 3.711068630218506, "rewards_train/2-2": 3.841184139251709, "rewards_train/2-w": -1.1636693477630615, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.177745819091797, "rewards_train/margins_1": 4.733945369720459, "rewards_train/margins_2": 5.0048534870147705, "step": 502 }, { "epoch": 1.5, "logps_train/policy_1_2": -160.5211181640625, "logps_train/policy_1_l": -177.1157989501953, "logps_train/policy_1_w": -113.80445861816406, "logps_train/policy_2_2": -101.4864501953125, "logps_train/policy_2_w": -179.4267578125, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.2431273460388184, "rewards_train/1-l": -2.468317985534668, "rewards_train/1-w": 2.464329481124878, "rewards_train/2-2": 2.6704959869384766, "rewards_train/2-w": -1.166406512260437, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.932647466659546, "rewards_train/margins_1": 3.7074568271636963, "rewards_train/margins_2": 3.8369024991989136, "step": 502 }, { "epoch": 1.5, "logps_train/policy_1_2": -102.23177337646484, "logps_train/policy_1_l": -118.99409484863281, "logps_train/policy_1_w": -56.254913330078125, "logps_train/policy_2_2": -54.143211364746094, "logps_train/policy_2_w": -115.03413391113281, "logps_train/ref_1_2": -85.5, "logps_train/ref_1_l": -97.5, "logps_train/ref_1_w": -74.0, "logps_train/ref_2_2": -72.0, "logps_train/ref_2_w": -93.0, "rewards_train/1-2": -1.687893033027649, "rewards_train/1-l": -2.1627326011657715, "rewards_train/1-w": 1.7634308338165283, "rewards_train/2-2": 1.7815312147140503, "rewards_train/2-w": -2.204288959503174, "rewards_train/accuracies": 0.8125, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9261634349823, "rewards_train/margins_1": 3.4513238668441772, "rewards_train/margins_2": 3.985820174217224, "step": 502 }, { "epoch": 1.51, "logps_train/policy_1_2": -198.8677215576172, "logps_train/policy_1_l": -155.75997924804688, "logps_train/policy_1_w": -139.56658935546875, "logps_train/policy_2_2": -126.12783813476562, "logps_train/policy_2_w": -222.7038116455078, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -2.0117716789245605, "rewards_train/1-l": -1.9644262790679932, "rewards_train/1-w": 3.256816864013672, "rewards_train/2-2": 2.759481906890869, "rewards_train/2-w": -2.260615348815918, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.221243143081665, "rewards_train/margins_1": 5.268588542938232, "rewards_train/margins_2": 5.020097255706787, "step": 503 }, { "epoch": 1.51, "logps_train/policy_1_2": -159.36212158203125, "logps_train/policy_1_l": -189.2141571044922, "logps_train/policy_1_w": -134.92532348632812, "logps_train/policy_2_2": -107.87234497070312, "logps_train/policy_2_w": -189.38436889648438, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -0.7155095338821411, "rewards_train/1-l": -2.266923666000366, "rewards_train/1-w": 2.863912582397461, "rewards_train/2-2": 2.7743868827819824, "rewards_train/2-w": -1.0149993896484375, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.130836248397827, "rewards_train/margins_1": 3.579422116279602, "rewards_train/margins_2": 3.78938627243042, "step": 503 }, { "epoch": 1.51, "logps_train/policy_1_2": -105.73028564453125, "logps_train/policy_1_l": -100.67681121826172, "logps_train/policy_1_w": -81.80365753173828, "logps_train/policy_2_2": -58.96955871582031, "logps_train/policy_2_w": -136.59237670898438, "logps_train/ref_1_2": -95.0, "logps_train/ref_1_l": -86.0, "logps_train/ref_1_w": -104.0, "logps_train/ref_2_2": -79.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": -1.10408353805542, "rewards_train/1-l": -1.4630789756774902, "rewards_train/1-w": 2.2086968421936035, "rewards_train/2-2": 2.027653455734253, "rewards_train/2-w": -1.0635355710983276, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.6717758178710938, "rewards_train/margins_1": 3.3127803802490234, "rewards_train/margins_2": 3.0911890268325806, "step": 503 }, { "epoch": 1.51, "logps_train/policy_1_2": -223.87921142578125, "logps_train/policy_1_l": -202.22756958007812, "logps_train/policy_1_w": -146.55734252929688, "logps_train/policy_2_2": -149.11697387695312, "logps_train/policy_2_w": -222.22799682617188, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -1.2285462617874146, "rewards_train/1-l": -2.5534212589263916, "rewards_train/1-w": 3.7145776748657227, "rewards_train/2-2": 3.628927707672119, "rewards_train/2-w": -1.2571744918823242, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.267998933792114, "rewards_train/margins_1": 4.943123936653137, "rewards_train/margins_2": 4.886102199554443, "step": 503 }, { "epoch": 1.51, "learning_rate": 7.944020918264458e-07, "loss": 0.5676, "step": 504 }, { "epoch": 1.51, "logps_train/policy_1_2": -178.58636474609375, "logps_train/policy_1_l": -180.0044403076172, "logps_train/policy_1_w": -135.02816772460938, "logps_train/policy_2_2": -122.31297302246094, "logps_train/policy_2_w": -204.29605102539062, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -0.6789477467536926, "rewards_train/1-l": -2.2297158241271973, "rewards_train/1-w": 3.498159170150757, "rewards_train/2-2": 3.2876477241516113, "rewards_train/2-w": -1.073746681213379, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.727874994277954, "rewards_train/margins_1": 4.1771069169044495, "rewards_train/margins_2": 4.36139440536499, "step": 504 }, { "epoch": 1.51, "logps_train/policy_1_2": -192.58631896972656, "logps_train/policy_1_l": -175.35398864746094, "logps_train/policy_1_w": -137.75262451171875, "logps_train/policy_2_2": -119.79707336425781, "logps_train/policy_2_w": -224.404541015625, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -1.8019909858703613, "rewards_train/1-l": -2.347703695297241, "rewards_train/1-w": 3.4968082904815674, "rewards_train/2-2": 3.1464638710021973, "rewards_train/2-w": -2.2802977561950684, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.844511985778809, "rewards_train/margins_1": 5.298799276351929, "rewards_train/margins_2": 5.426761627197266, "step": 504 }, { "epoch": 1.51, "logps_train/policy_1_2": -121.97712707519531, "logps_train/policy_1_l": -106.21464538574219, "logps_train/policy_1_w": -73.12498474121094, "logps_train/policy_2_2": -79.79478454589844, "logps_train/policy_2_w": -108.85633087158203, "logps_train/ref_1_2": -112.5, "logps_train/ref_1_l": -89.0, "logps_train/ref_1_w": -98.0, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -108.5, "rewards_train/1-2": -0.9266189932823181, "rewards_train/1-l": -1.6910932064056396, "rewards_train/1-w": 2.463087558746338, "rewards_train/2-2": 1.9857077598571777, "rewards_train/2-w": -0.032312341034412384, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.1541807651519775, "rewards_train/margins_1": 3.389706552028656, "rewards_train/margins_2": 2.01802010089159, "step": 504 }, { "epoch": 1.51, "logps_train/policy_1_2": -168.48902893066406, "logps_train/policy_1_l": -118.638427734375, "logps_train/policy_1_w": -155.96087646484375, "logps_train/policy_2_2": -103.39421081542969, "logps_train/policy_2_w": -245.74398803710938, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -0.9461687207221985, "rewards_train/1-l": -1.4934322834014893, "rewards_train/1-w": 3.680866241455078, "rewards_train/2-2": 2.9551100730895996, "rewards_train/2-w": -1.9868985414505005, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.174298524856567, "rewards_train/margins_1": 4.627034962177277, "rewards_train/margins_2": 4.9420086145401, "step": 504 }, { "epoch": 1.51, "logps_train/policy_1_2": -155.5869140625, "logps_train/policy_1_l": -163.22659301757812, "logps_train/policy_1_w": -85.5084228515625, "logps_train/policy_2_2": -101.08589935302734, "logps_train/policy_2_w": -149.13238525390625, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -1.1888680458068848, "rewards_train/1-l": -1.7490277290344238, "rewards_train/1-w": 2.897204875946045, "rewards_train/2-2": 2.5011754035949707, "rewards_train/2-w": -1.3933178186416626, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.646232604980469, "rewards_train/margins_1": 4.08607292175293, "rewards_train/margins_2": 3.8944932222366333, "step": 505 }, { "epoch": 1.51, "logps_train/policy_1_2": -157.27523803710938, "logps_train/policy_1_l": -157.83016967773438, "logps_train/policy_1_w": -127.1297378540039, "logps_train/policy_2_2": -97.48908996582031, "logps_train/policy_2_w": -204.00502014160156, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.119565486907959, "rewards_train/1-l": -2.1060144901275635, "rewards_train/1-w": 3.127455234527588, "rewards_train/2-2": 2.7200846672058105, "rewards_train/2-w": -1.355776071548462, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.233469724655151, "rewards_train/margins_1": 4.247020721435547, "rewards_train/margins_2": 4.0758607387542725, "step": 505 }, { "epoch": 1.51, "logps_train/policy_1_2": -245.62022399902344, "logps_train/policy_1_l": -223.04840087890625, "logps_train/policy_1_w": -187.17617797851562, "logps_train/policy_2_2": -157.931396484375, "logps_train/policy_2_w": -275.69219970703125, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -227.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -258.0, "rewards_train/1-2": -2.2405381202697754, "rewards_train/1-l": -2.91013765335083, "rewards_train/1-w": 3.9794278144836426, "rewards_train/2-2": 3.651782989501953, "rewards_train/2-w": -1.8107731342315674, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.889565467834473, "rewards_train/margins_1": 6.219965934753418, "rewards_train/margins_2": 5.4625561237335205, "step": 505 }, { "epoch": 1.51, "logps_train/policy_1_2": -163.77197265625, "logps_train/policy_1_l": -155.19131469726562, "logps_train/policy_1_w": -125.51789093017578, "logps_train/policy_2_2": -106.27377319335938, "logps_train/policy_2_w": -200.2610321044922, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -0.5463374257087708, "rewards_train/1-l": -2.4603424072265625, "rewards_train/1-w": 2.57047700881958, "rewards_train/2-2": 3.4864895343780518, "rewards_train/2-w": -2.1405558586120605, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.030819416046143, "rewards_train/margins_1": 3.116814434528351, "rewards_train/margins_2": 5.627045392990112, "step": 505 }, { "epoch": 1.51, "learning_rate": 7.764281507974711e-07, "loss": 0.5994, "step": 506 }, { "epoch": 1.51, "logps_train/policy_1_2": -141.37571716308594, "logps_train/policy_1_l": -148.8104248046875, "logps_train/policy_1_w": -105.44551086425781, "logps_train/policy_2_2": -98.71310424804688, "logps_train/policy_2_w": -173.35736083984375, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.6449936628341675, "rewards_train/1-l": -2.346984386444092, "rewards_train/1-w": 2.8202924728393555, "rewards_train/2-2": 2.395291328430176, "rewards_train/2-w": -1.3591742515563965, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.167276859283447, "rewards_train/margins_1": 3.465286135673523, "rewards_train/margins_2": 3.7544655799865723, "step": 506 }, { "epoch": 1.51, "logps_train/policy_1_2": -211.55331420898438, "logps_train/policy_1_l": -177.67941284179688, "logps_train/policy_1_w": -99.71412658691406, "logps_train/policy_2_2": -132.19317626953125, "logps_train/policy_2_w": -172.4817657470703, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.6533782482147217, "rewards_train/1-l": -2.329270124435425, "rewards_train/1-w": 3.3139142990112305, "rewards_train/2-2": 3.4525575637817383, "rewards_train/2-w": -1.5292317867279053, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.643184423446655, "rewards_train/margins_1": 4.967292547225952, "rewards_train/margins_2": 4.9817893505096436, "step": 506 }, { "epoch": 1.51, "logps_train/policy_1_2": -197.55636596679688, "logps_train/policy_1_l": -137.37867736816406, "logps_train/policy_1_w": -97.73405456542969, "logps_train/policy_2_2": -122.32475280761719, "logps_train/policy_2_w": -153.42906188964844, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -115.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -1.9644739627838135, "rewards_train/1-l": -2.2128193378448486, "rewards_train/1-w": 2.6496407985687256, "rewards_train/2-2": 3.292182683944702, "rewards_train/2-w": -1.0261096954345703, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.862460136413574, "rewards_train/margins_1": 4.614114761352539, "rewards_train/margins_2": 4.3182923793792725, "step": 506 }, { "epoch": 1.51, "logps_train/policy_1_2": -196.60824584960938, "logps_train/policy_1_l": -167.7921142578125, "logps_train/policy_1_w": -86.57886505126953, "logps_train/policy_2_2": -118.97265625, "logps_train/policy_2_w": -153.64144897460938, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -2.428013324737549, "rewards_train/1-l": -2.6793088912963867, "rewards_train/1-w": 2.6366450786590576, "rewards_train/2-2": 2.7527339458465576, "rewards_train/2-w": -1.6223481893539429, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.315953969955444, "rewards_train/margins_1": 5.0646584033966064, "rewards_train/margins_2": 4.3750821352005005, "step": 506 }, { "epoch": 1.52, "logps_train/policy_1_2": -228.4810333251953, "logps_train/policy_1_l": -217.8291015625, "logps_train/policy_1_w": -133.4031982421875, "logps_train/policy_2_2": -150.77389526367188, "logps_train/policy_2_w": -198.48008728027344, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -185.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -1.3807196617126465, "rewards_train/1-l": -2.2559573650360107, "rewards_train/1-w": 3.74678897857666, "rewards_train/2-2": 3.4378442764282227, "rewards_train/2-w": -0.3948839604854584, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.002746343612671, "rewards_train/margins_1": 5.127508640289307, "rewards_train/margins_2": 3.832728236913681, "step": 507 }, { "epoch": 1.52, "logps_train/policy_1_2": -164.22601318359375, "logps_train/policy_1_l": -200.406982421875, "logps_train/policy_1_w": -159.13824462890625, "logps_train/policy_2_2": -107.72113037109375, "logps_train/policy_2_w": -251.20639038085938, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -201.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": -0.6835393905639648, "rewards_train/1-l": -2.404076337814331, "rewards_train/1-w": 4.213325023651123, "rewards_train/2-2": 2.776324987411499, "rewards_train/2-w": -2.0838732719421387, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.617401361465454, "rewards_train/margins_1": 4.896864414215088, "rewards_train/margins_2": 4.860198259353638, "step": 507 }, { "epoch": 1.52, "logps_train/policy_1_2": -153.75254821777344, "logps_train/policy_1_l": -136.57781982421875, "logps_train/policy_1_w": -84.85777282714844, "logps_train/policy_2_2": -100.77912139892578, "logps_train/policy_2_w": -128.2515106201172, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -112.5, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -124.5, "rewards_train/1-2": -0.7990828156471252, "rewards_train/1-l": -2.398846387863159, "rewards_train/1-w": 2.403773307800293, "rewards_train/2-2": 2.5175952911376953, "rewards_train/2-w": -0.3711468577384949, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.802619695663452, "rewards_train/margins_1": 3.202856123447418, "rewards_train/margins_2": 2.88874214887619, "step": 507 }, { "epoch": 1.52, "logps_train/policy_1_2": -220.39654541015625, "logps_train/policy_1_l": -195.91976928710938, "logps_train/policy_1_w": -152.93701171875, "logps_train/policy_2_2": -156.2884979248047, "logps_train/policy_2_w": -211.9873046875, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -0.807624340057373, "rewards_train/1-l": -2.603499412536621, "rewards_train/1-w": 3.7238783836364746, "rewards_train/2-2": 3.198298692703247, "rewards_train/2-w": -0.04306647181510925, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.327377796173096, "rewards_train/margins_1": 4.531502723693848, "rewards_train/margins_2": 3.2413651645183563, "step": 507 }, { "epoch": 1.52, "learning_rate": 7.586224273425083e-07, "loss": 0.5887, "step": 508 }, { "epoch": 1.52, "logps_train/policy_1_2": -192.78054809570312, "logps_train/policy_1_l": -149.9927978515625, "logps_train/policy_1_w": -93.58294677734375, "logps_train/policy_2_2": -126.48262786865234, "logps_train/policy_2_w": -164.81015014648438, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -124.5, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": -1.4620399475097656, "rewards_train/1-l": -2.533264636993408, "rewards_train/1-w": 3.09199857711792, "rewards_train/2-2": 3.134158134460449, "rewards_train/2-w": -1.0237892866134644, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.625263214111328, "rewards_train/margins_1": 4.5540385246276855, "rewards_train/margins_2": 4.157947421073914, "step": 508 }, { "epoch": 1.52, "logps_train/policy_1_2": -154.15745544433594, "logps_train/policy_1_l": -156.82891845703125, "logps_train/policy_1_w": -104.17041015625, "logps_train/policy_2_2": -98.2718276977539, "logps_train/policy_2_w": -158.57611083984375, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -1.3757069110870361, "rewards_train/1-l": -2.4343574047088623, "rewards_train/1-w": 2.6856942176818848, "rewards_train/2-2": 2.545473575592041, "rewards_train/2-w": -0.746574878692627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.120051622390747, "rewards_train/margins_1": 4.061401128768921, "rewards_train/margins_2": 3.292048454284668, "step": 508 }, { "epoch": 1.52, "logps_train/policy_1_2": -169.02847290039062, "logps_train/policy_1_l": -128.41497802734375, "logps_train/policy_1_w": -109.69772338867188, "logps_train/policy_2_2": -109.1385498046875, "logps_train/policy_2_w": -181.64108276367188, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -105.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.2462074756622314, "rewards_train/1-l": -2.338568687438965, "rewards_train/1-w": 3.584134101867676, "rewards_train/2-2": 2.542785406112671, "rewards_train/2-w": -1.1207480430603027, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.922702789306641, "rewards_train/margins_1": 4.830341577529907, "rewards_train/margins_2": 3.6635334491729736, "step": 508 }, { "epoch": 1.52, "logps_train/policy_1_2": -205.31149291992188, "logps_train/policy_1_l": -205.90887451171875, "logps_train/policy_1_w": -160.10128784179688, "logps_train/policy_2_2": -132.57168579101562, "logps_train/policy_2_w": -251.94912719726562, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -227.0, "rewards_train/1-2": -1.9530243873596191, "rewards_train/1-l": -2.5175962448120117, "rewards_train/1-w": 3.35998797416687, "rewards_train/2-2": 3.078378677368164, "rewards_train/2-w": -2.4242095947265625, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.877584218978882, "rewards_train/margins_1": 5.313012361526489, "rewards_train/margins_2": 5.502588272094727, "step": 508 }, { "epoch": 1.52, "logps_train/policy_1_2": -182.83888244628906, "logps_train/policy_1_l": -186.46875, "logps_train/policy_1_w": -109.08265686035156, "logps_train/policy_2_2": -109.96415710449219, "logps_train/policy_2_w": -188.45245361328125, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.7684588432312012, "rewards_train/1-l": -2.4875009059906006, "rewards_train/1-w": 3.4424850940704346, "rewards_train/2-2": 3.303389072418213, "rewards_train/2-w": -1.9800114631652832, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.929986000061035, "rewards_train/margins_1": 5.210943937301636, "rewards_train/margins_2": 5.283400535583496, "step": 509 }, { "epoch": 1.52, "logps_train/policy_1_2": -178.9961395263672, "logps_train/policy_1_l": -203.34979248046875, "logps_train/policy_1_w": -114.71658325195312, "logps_train/policy_2_2": -118.28935241699219, "logps_train/policy_2_w": -186.71719360351562, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -1.5178756713867188, "rewards_train/1-l": -2.7618350982666016, "rewards_train/1-w": 2.7218964099884033, "rewards_train/2-2": 2.510493516921997, "rewards_train/2-w": -2.110978364944458, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.483731508255005, "rewards_train/margins_1": 4.239772081375122, "rewards_train/margins_2": 4.621471881866455, "step": 509 }, { "epoch": 1.52, "logps_train/policy_1_2": -147.0889434814453, "logps_train/policy_1_l": -126.14069366455078, "logps_train/policy_1_w": -110.2645034790039, "logps_train/policy_2_2": -89.51402282714844, "logps_train/policy_2_w": -188.3894805908203, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -1.3436596393585205, "rewards_train/1-l": -2.578327178955078, "rewards_train/1-w": 3.1872215270996094, "rewards_train/2-2": 2.501918315887451, "rewards_train/2-w": -1.8998867273330688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.7655487060546875, "rewards_train/margins_1": 4.53088116645813, "rewards_train/margins_2": 4.40180504322052, "step": 509 }, { "epoch": 1.52, "logps_train/policy_1_2": -171.60528564453125, "logps_train/policy_1_l": -223.9129638671875, "logps_train/policy_1_w": -162.226806640625, "logps_train/policy_2_2": -118.03019714355469, "logps_train/policy_2_w": -228.8123779296875, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": -0.07967031002044678, "rewards_train/1-l": -2.9920766353607178, "rewards_train/1-w": 3.3960700035095215, "rewards_train/2-2": 3.555769920349121, "rewards_train/2-w": -1.2613167762756348, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.388146638870239, "rewards_train/margins_1": 3.4757403135299683, "rewards_train/margins_2": 4.817086696624756, "step": 509 }, { "epoch": 1.53, "learning_rate": 7.409866592687768e-07, "loss": 0.5181, "step": 510 }, { "epoch": 1.53, "logps_train/policy_1_2": -242.00616455078125, "logps_train/policy_1_l": -175.9071044921875, "logps_train/policy_1_w": -128.56887817382812, "logps_train/policy_2_2": -165.71917724609375, "logps_train/policy_2_w": -197.16851806640625, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.432257056236267, "rewards_train/1-l": -1.7684462070465088, "rewards_train/1-w": 3.5036585330963135, "rewards_train/2-2": 3.4319891929626465, "rewards_train/2-w": -1.0418518781661987, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.272104740142822, "rewards_train/margins_1": 4.935915589332581, "rewards_train/margins_2": 4.473841071128845, "step": 510 }, { "epoch": 1.53, "logps_train/policy_1_2": -210.09902954101562, "logps_train/policy_1_l": -237.04238891601562, "logps_train/policy_1_w": -144.64340209960938, "logps_train/policy_2_2": -130.608154296875, "logps_train/policy_2_w": -228.75592041015625, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -2.261075973510742, "rewards_train/1-l": -3.3508694171905518, "rewards_train/1-w": 3.383559226989746, "rewards_train/2-2": 3.110668659210205, "rewards_train/2-w": -2.351569414138794, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.734428644180298, "rewards_train/margins_1": 5.644635200500488, "rewards_train/margins_2": 5.462238073348999, "step": 510 }, { "epoch": 1.53, "logps_train/policy_1_2": -200.89541625976562, "logps_train/policy_1_l": -177.42352294921875, "logps_train/policy_1_w": -113.54653930664062, "logps_train/policy_2_2": -125.18048095703125, "logps_train/policy_2_w": -178.9439697265625, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -1.859073519706726, "rewards_train/1-l": -2.4563183784484863, "rewards_train/1-w": 2.751645565032959, "rewards_train/2-2": 3.1258978843688965, "rewards_train/2-w": -1.535510778427124, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.207963943481445, "rewards_train/margins_1": 4.610719084739685, "rewards_train/margins_2": 4.6614086627960205, "step": 510 }, { "epoch": 1.53, "logps_train/policy_1_2": -196.49435424804688, "logps_train/policy_1_l": -182.66964721679688, "logps_train/policy_1_w": -141.50840759277344, "logps_train/policy_2_2": -114.92692565917969, "logps_train/policy_2_w": -226.54345703125, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -1.8259979486465454, "rewards_train/1-l": -2.3248744010925293, "rewards_train/1-w": 3.5390028953552246, "rewards_train/2-2": 3.1198079586029053, "rewards_train/2-w": -1.9590330123901367, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.863877296447754, "rewards_train/margins_1": 5.36500084400177, "rewards_train/margins_2": 5.078840970993042, "step": 510 }, { "epoch": 1.53, "logps_train/policy_1_2": -183.9020538330078, "logps_train/policy_1_l": -179.21119689941406, "logps_train/policy_1_w": -122.47525787353516, "logps_train/policy_2_2": -123.37018585205078, "logps_train/policy_2_w": -192.00064086914062, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -0.8331732749938965, "rewards_train/1-l": -1.917212724685669, "rewards_train/1-w": 3.1790361404418945, "rewards_train/2-2": 3.092669725418091, "rewards_train/2-w": -0.8744772672653198, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.0962488651275635, "rewards_train/margins_1": 4.012209415435791, "rewards_train/margins_2": 3.9671469926834106, "step": 511 }, { "epoch": 1.53, "logps_train/policy_1_2": -155.54144287109375, "logps_train/policy_1_l": -184.08636474609375, "logps_train/policy_1_w": -109.69120788574219, "logps_train/policy_2_2": -106.59790802001953, "logps_train/policy_2_w": -164.2313690185547, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -0.743792712688446, "rewards_train/1-l": -2.394573211669922, "rewards_train/1-w": 2.710273504257202, "rewards_train/2-2": 2.559154748916626, "rewards_train/2-w": -1.2311441898345947, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.104846715927124, "rewards_train/margins_1": 3.454066216945648, "rewards_train/margins_2": 3.7902989387512207, "step": 511 }, { "epoch": 1.53, "logps_train/policy_1_2": -177.9116973876953, "logps_train/policy_1_l": -143.7704620361328, "logps_train/policy_1_w": -94.7459487915039, "logps_train/policy_2_2": -110.10752868652344, "logps_train/policy_2_w": -163.42489624023438, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -124.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -1.4396069049835205, "rewards_train/1-l": -1.4228850603103638, "rewards_train/1-w": 2.9664454460144043, "rewards_train/2-2": 2.9269914627075195, "rewards_train/2-w": -1.252353310585022, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.389330506324768, "rewards_train/margins_1": 4.406052350997925, "rewards_train/margins_2": 4.1793447732925415, "step": 511 }, { "epoch": 1.53, "logps_train/policy_1_2": -176.80755615234375, "logps_train/policy_1_l": -149.57000732421875, "logps_train/policy_1_w": -120.26290893554688, "logps_train/policy_2_2": -119.36117553710938, "logps_train/policy_2_w": -195.03179931640625, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.2479444742202759, "rewards_train/1-l": -2.005828857421875, "rewards_train/1-w": 2.9502716064453125, "rewards_train/2-2": 2.4599766731262207, "rewards_train/2-w": -1.8760297298431396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.9561004638671875, "rewards_train/margins_1": 4.198216080665588, "rewards_train/margins_2": 4.33600640296936, "step": 511 }, { "epoch": 1.53, "learning_rate": 7.235225677961513e-07, "loss": 0.5168, "step": 512 }, { "epoch": 1.53, "logps_train/policy_1_2": -178.00729370117188, "logps_train/policy_1_l": -168.4149627685547, "logps_train/policy_1_w": -109.71092224121094, "logps_train/policy_2_2": -116.69906616210938, "logps_train/policy_2_w": -170.41403198242188, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.3452613353729248, "rewards_train/1-l": -1.8459391593933105, "rewards_train/1-w": 3.029054641723633, "rewards_train/2-2": 2.7096340656280518, "rewards_train/2-w": -0.876632571220398, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.874993801116943, "rewards_train/margins_1": 4.374315977096558, "rewards_train/margins_2": 3.5862666368484497, "step": 512 }, { "epoch": 1.53, "logps_train/policy_1_2": -182.02496337890625, "logps_train/policy_1_l": -197.50128173828125, "logps_train/policy_1_w": -147.96438598632812, "logps_train/policy_2_2": -121.84909057617188, "logps_train/policy_2_w": -251.16729736328125, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": -1.02280855178833, "rewards_train/1-l": -2.339190721511841, "rewards_train/1-w": 4.120358943939209, "rewards_train/2-2": 2.684622287750244, "rewards_train/2-w": -2.1729795932769775, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.45954966545105, "rewards_train/margins_1": 5.143167495727539, "rewards_train/margins_2": 4.857601881027222, "step": 512 }, { "epoch": 1.53, "logps_train/policy_1_2": -261.0372619628906, "logps_train/policy_1_l": -223.5365447998047, "logps_train/policy_1_w": -120.6550064086914, "logps_train/policy_2_2": -163.02279663085938, "logps_train/policy_2_w": -208.27711486816406, "logps_train/ref_1_2": -239.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": -2.2899081707000732, "rewards_train/1-l": -2.692375898361206, "rewards_train/1-w": 3.517897844314575, "rewards_train/2-2": 3.9579734802246094, "rewards_train/2-w": -1.9577898979187012, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.210273742675781, "rewards_train/margins_1": 5.807806015014648, "rewards_train/margins_2": 5.9157633781433105, "step": 512 }, { "epoch": 1.53, "logps_train/policy_1_2": -153.64634704589844, "logps_train/policy_1_l": -124.8259048461914, "logps_train/policy_1_w": -67.06251525878906, "logps_train/policy_2_2": -93.85272216796875, "logps_train/policy_2_w": -117.72066497802734, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -109.0, "rewards_train/1-2": -1.302037239074707, "rewards_train/1-l": -2.1450905799865723, "rewards_train/1-w": 2.468163013458252, "rewards_train/2-2": 2.783673048019409, "rewards_train/2-w": -0.8652306199073792, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.613253593444824, "rewards_train/margins_1": 3.770200252532959, "rewards_train/margins_2": 3.6489036679267883, "step": 512 }, { "epoch": 1.54, "logps_train/policy_1_2": -148.09246826171875, "logps_train/policy_1_l": -166.23509216308594, "logps_train/policy_1_w": -118.2115707397461, "logps_train/policy_2_2": -98.46342468261719, "logps_train/policy_2_w": -178.41273498535156, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -0.45221495628356934, "rewards_train/1-l": -2.460716724395752, "rewards_train/1-w": 3.2077488899230957, "rewards_train/2-2": 2.8091256618499756, "rewards_train/2-w": -0.590491771697998, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.668465614318848, "rewards_train/margins_1": 3.659963846206665, "rewards_train/margins_2": 3.3996174335479736, "step": 513 }, { "epoch": 1.54, "logps_train/policy_1_2": -161.33192443847656, "logps_train/policy_1_l": -143.9378662109375, "logps_train/policy_1_w": -114.6861572265625, "logps_train/policy_2_2": -111.68107604980469, "logps_train/policy_2_w": -178.00985717773438, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -0.9047746658325195, "rewards_train/1-l": -2.095447063446045, "rewards_train/1-w": 2.8557989597320557, "rewards_train/2-2": 2.5469794273376465, "rewards_train/2-w": -1.3513760566711426, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.951246023178101, "rewards_train/margins_1": 3.760573625564575, "rewards_train/margins_2": 3.898355484008789, "step": 513 }, { "epoch": 1.54, "logps_train/policy_1_2": -192.48776245117188, "logps_train/policy_1_l": -219.3513641357422, "logps_train/policy_1_w": -138.4727783203125, "logps_train/policy_2_2": -121.22512817382812, "logps_train/policy_2_w": -219.89239501953125, "logps_train/ref_1_2": -177.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -1.5526819229125977, "rewards_train/1-l": -2.7859175205230713, "rewards_train/1-w": 3.38446044921875, "rewards_train/2-2": 3.143990993499756, "rewards_train/2-w": -1.7825983762741089, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.170377969741821, "rewards_train/margins_1": 4.937142372131348, "rewards_train/margins_2": 4.926589369773865, "step": 513 }, { "epoch": 1.54, "logps_train/policy_1_2": -198.12057495117188, "logps_train/policy_1_l": -177.68807983398438, "logps_train/policy_1_w": -98.72337341308594, "logps_train/policy_2_2": -131.11614990234375, "logps_train/policy_2_w": -163.92588806152344, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -2.275338888168335, "rewards_train/1-l": -2.238924503326416, "rewards_train/1-w": 2.512038230895996, "rewards_train/2-2": 2.2794013023376465, "rewards_train/2-w": -1.7070424556732178, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.750962734222412, "rewards_train/margins_1": 4.787377119064331, "rewards_train/margins_2": 3.9864437580108643, "step": 513 }, { "epoch": 1.54, "learning_rate": 7.062318573891716e-07, "loss": 0.4757, "step": 514 }, { "epoch": 1.54, "logps_train/policy_1_2": -186.49755859375, "logps_train/policy_1_l": -157.81146240234375, "logps_train/policy_1_w": -102.86832427978516, "logps_train/policy_2_2": -116.41972351074219, "logps_train/policy_2_w": -160.8140411376953, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -1.2591310739517212, "rewards_train/1-l": -2.533538818359375, "rewards_train/1-w": 2.7106285095214844, "rewards_train/2-2": 3.343184471130371, "rewards_train/2-w": -1.085310935974121, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.244167327880859, "rewards_train/margins_1": 3.9697595834732056, "rewards_train/margins_2": 4.428495407104492, "step": 514 }, { "epoch": 1.54, "logps_train/policy_1_2": -185.3196258544922, "logps_train/policy_1_l": -144.74786376953125, "logps_train/policy_1_w": -105.85978698730469, "logps_train/policy_2_2": -124.75825500488281, "logps_train/policy_2_w": -169.3419189453125, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.4661420583724976, "rewards_train/1-l": -1.5778130292892456, "rewards_train/1-w": 3.187849760055542, "rewards_train/2-2": 2.4017138481140137, "rewards_train/2-w": -1.1994260549545288, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.765662789344788, "rewards_train/margins_1": 4.6539918184280396, "rewards_train/margins_2": 3.6011399030685425, "step": 514 }, { "epoch": 1.54, "logps_train/policy_1_2": -201.27108764648438, "logps_train/policy_1_l": -131.5332794189453, "logps_train/policy_1_w": -122.42135620117188, "logps_train/policy_2_2": -119.25430297851562, "logps_train/policy_2_w": -217.14773559570312, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -2.792343854904175, "rewards_train/1-l": -1.4266307353973389, "rewards_train/1-w": 3.22925066947937, "rewards_train/2-2": 2.905428647994995, "rewards_train/2-w": -3.197195053100586, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.655881404876709, "rewards_train/margins_1": 6.021594524383545, "rewards_train/margins_2": 6.102623701095581, "step": 514 }, { "epoch": 1.54, "logps_train/policy_1_2": -128.366943359375, "logps_train/policy_1_l": -111.15763092041016, "logps_train/policy_1_w": -79.95315551757812, "logps_train/policy_2_2": -81.9337158203125, "logps_train/policy_2_w": -126.31039428710938, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": -0.3238036036491394, "rewards_train/1-l": -1.332730770111084, "rewards_train/1-w": 3.1900360584259033, "rewards_train/2-2": 2.79998779296875, "rewards_train/2-w": -0.04275795817375183, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.522766828536987, "rewards_train/margins_1": 3.5138396620750427, "rewards_train/margins_2": 2.842745751142502, "step": 514 }, { "epoch": 1.54, "logps_train/policy_1_2": -186.44845581054688, "logps_train/policy_1_l": -172.22381591796875, "logps_train/policy_1_w": -102.27328491210938, "logps_train/policy_2_2": -118.88861083984375, "logps_train/policy_2_w": -156.15309143066406, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -1.6764851808547974, "rewards_train/1-l": -2.4657416343688965, "rewards_train/1-w": 2.2736001014709473, "rewards_train/2-2": 2.8720765113830566, "rewards_train/2-w": -1.3218028545379639, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.739341735839844, "rewards_train/margins_1": 3.9500852823257446, "rewards_train/margins_2": 4.1938793659210205, "step": 515 }, { "epoch": 1.54, "logps_train/policy_1_2": -243.1448974609375, "logps_train/policy_1_l": -156.72491455078125, "logps_train/policy_1_w": -134.59634399414062, "logps_train/policy_2_2": -144.478515625, "logps_train/policy_2_w": -233.30714416503906, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -3.055115222930908, "rewards_train/1-l": -2.1047186851501465, "rewards_train/1-w": 3.3979828357696533, "rewards_train/2-2": 3.770899772644043, "rewards_train/2-w": -2.7822773456573486, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.5027015209198, "rewards_train/margins_1": 6.4530980587005615, "rewards_train/margins_2": 6.553177118301392, "step": 515 }, { "epoch": 1.54, "logps_train/policy_1_2": -185.13726806640625, "logps_train/policy_1_l": -160.3070526123047, "logps_train/policy_1_w": -98.49143981933594, "logps_train/policy_2_2": -107.46153259277344, "logps_train/policy_2_w": -166.56106567382812, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -2.04575777053833, "rewards_train/1-l": -2.0147812366485596, "rewards_train/1-w": 2.8762459754943848, "rewards_train/2-2": 2.8335347175598145, "rewards_train/2-w": -1.8990745544433594, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.891027212142944, "rewards_train/margins_1": 4.922003746032715, "rewards_train/margins_2": 4.732609272003174, "step": 515 }, { "epoch": 1.54, "logps_train/policy_1_2": -209.84400939941406, "logps_train/policy_1_l": -185.1787567138672, "logps_train/policy_1_w": -130.63848876953125, "logps_train/policy_2_2": -145.17022705078125, "logps_train/policy_2_w": -203.43984985351562, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.282448410987854, "rewards_train/1-l": -2.412473440170288, "rewards_train/1-w": 3.392595052719116, "rewards_train/2-2": 3.0970401763916016, "rewards_train/2-w": -1.587344765663147, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.805068492889404, "rewards_train/margins_1": 4.67504346370697, "rewards_train/margins_2": 4.6843849420547485, "step": 515 }, { "epoch": 1.54, "learning_rate": 6.89116215590693e-07, "loss": 0.5618, "step": 516 }, { "epoch": 1.54, "logps_train/policy_1_2": -196.91477966308594, "logps_train/policy_1_l": -236.11874389648438, "logps_train/policy_1_w": -133.4747314453125, "logps_train/policy_2_2": -130.313720703125, "logps_train/policy_2_w": -211.10223388671875, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.3440165519714355, "rewards_train/1-l": -3.075936794281006, "rewards_train/1-w": 3.111022472381592, "rewards_train/2-2": 2.8670647144317627, "rewards_train/2-w": -1.4334652423858643, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.186959266662598, "rewards_train/margins_1": 4.455039024353027, "rewards_train/margins_2": 4.300529956817627, "step": 516 }, { "epoch": 1.54, "logps_train/policy_1_2": -153.68692016601562, "logps_train/policy_1_l": -109.08898162841797, "logps_train/policy_1_w": -101.46873474121094, "logps_train/policy_2_2": -103.55866241455078, "logps_train/policy_2_w": -153.30300903320312, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -88.5, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.1100986003875732, "rewards_train/1-l": -2.055504322052002, "rewards_train/1-w": 3.036329984664917, "rewards_train/2-2": 2.3493094444274902, "rewards_train/2-w": -0.5513952374458313, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.091834306716919, "rewards_train/margins_1": 4.14642858505249, "rewards_train/margins_2": 2.9007046818733215, "step": 516 }, { "epoch": 1.54, "logps_train/policy_1_2": -211.46954345703125, "logps_train/policy_1_l": -244.23924255371094, "logps_train/policy_1_w": -137.45095825195312, "logps_train/policy_2_2": -134.04489135742188, "logps_train/policy_2_w": -222.51885986328125, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -207.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -1.5930489301681519, "rewards_train/1-l": -3.6832990646362305, "rewards_train/1-w": 3.3959197998046875, "rewards_train/2-2": 3.4048850536346436, "rewards_train/2-w": -1.7093069553375244, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 7.079218864440918, "rewards_train/margins_1": 4.988968729972839, "rewards_train/margins_2": 5.114192008972168, "step": 516 }, { "epoch": 1.54, "logps_train/policy_1_2": -226.9698486328125, "logps_train/policy_1_l": -210.63653564453125, "logps_train/policy_1_w": -119.08587646484375, "logps_train/policy_2_2": -147.53981018066406, "logps_train/policy_2_w": -201.34768676757812, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -1.496496319770813, "rewards_train/1-l": -1.7070142030715942, "rewards_train/1-w": 3.030670166015625, "rewards_train/2-2": 3.5209217071533203, "rewards_train/2-w": -2.4324254989624023, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.737684369087219, "rewards_train/margins_1": 4.527166485786438, "rewards_train/margins_2": 5.953347206115723, "step": 516 }, { "epoch": 1.55, "logps_train/policy_1_2": -172.6570587158203, "logps_train/policy_1_l": -117.58319091796875, "logps_train/policy_1_w": -96.24081420898438, "logps_train/policy_2_2": -103.65351867675781, "logps_train/policy_2_w": -172.3195343017578, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -2.2113113403320312, "rewards_train/1-l": -1.7434747219085693, "rewards_train/1-w": 2.7259182929992676, "rewards_train/2-2": 2.373906373977661, "rewards_train/2-w": -2.453242540359497, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.469393014907837, "rewards_train/margins_1": 4.937229633331299, "rewards_train/margins_2": 4.827148914337158, "step": 517 }, { "epoch": 1.55, "logps_train/policy_1_2": -193.00863647460938, "logps_train/policy_1_l": -230.61502075195312, "logps_train/policy_1_w": -123.27234649658203, "logps_train/policy_2_2": -123.26551055908203, "logps_train/policy_2_w": -193.8841552734375, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -201.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -1.7676594257354736, "rewards_train/1-l": -2.9249796867370605, "rewards_train/1-w": 2.8053829669952393, "rewards_train/2-2": 2.7642695903778076, "rewards_train/2-w": -1.576696753501892, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.7303626537323, "rewards_train/margins_1": 4.573042392730713, "rewards_train/margins_2": 4.3409663438797, "step": 517 }, { "epoch": 1.55, "logps_train/policy_1_2": -247.8461151123047, "logps_train/policy_1_l": -194.46865844726562, "logps_train/policy_1_w": -137.6522979736328, "logps_train/policy_2_2": -164.91612243652344, "logps_train/policy_2_w": -211.29922485351562, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.767033338546753, "rewards_train/1-l": -2.386537790298462, "rewards_train/1-w": 3.617192268371582, "rewards_train/2-2": 3.7224512100219727, "rewards_train/2-w": -1.401017189025879, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.003730058670044, "rewards_train/margins_1": 5.384225606918335, "rewards_train/margins_2": 5.123468399047852, "step": 517 }, { "epoch": 1.55, "logps_train/policy_1_2": -156.5227508544922, "logps_train/policy_1_l": -140.14747619628906, "logps_train/policy_1_w": -132.60704040527344, "logps_train/policy_2_2": -104.522216796875, "logps_train/policy_2_w": -210.6446533203125, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -120.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -0.9366494417190552, "rewards_train/1-l": -1.9952776432037354, "rewards_train/1-w": 3.3242568969726562, "rewards_train/2-2": 2.402466058731079, "rewards_train/2-w": -1.7082152366638184, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.319534540176392, "rewards_train/margins_1": 4.260906338691711, "rewards_train/margins_2": 4.1106812953948975, "step": 517 }, { "epoch": 1.55, "learning_rate": 6.721773128571812e-07, "loss": 0.551, "step": 518 }, { "epoch": 1.55, "logps_train/policy_1_2": -195.4637451171875, "logps_train/policy_1_l": -193.91123962402344, "logps_train/policy_1_w": -124.7506103515625, "logps_train/policy_2_2": -119.26455688476562, "logps_train/policy_2_w": -194.45828247070312, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -2.19696044921875, "rewards_train/1-l": -2.531358480453491, "rewards_train/1-w": 3.0499205589294434, "rewards_train/2-2": 3.266122341156006, "rewards_train/2-w": -1.9229772090911865, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.581279039382935, "rewards_train/margins_1": 5.246881008148193, "rewards_train/margins_2": 5.189099550247192, "step": 518 }, { "epoch": 1.55, "logps_train/policy_1_2": -195.41543579101562, "logps_train/policy_1_l": -144.52200317382812, "logps_train/policy_1_w": -116.20381927490234, "logps_train/policy_2_2": -116.77006530761719, "logps_train/policy_2_w": -192.89712524414062, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -2.0212314128875732, "rewards_train/1-l": -2.3089141845703125, "rewards_train/1-w": 2.963308811187744, "rewards_train/2-2": 3.2435004711151123, "rewards_train/2-w": -2.2928366661071777, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.272222995758057, "rewards_train/margins_1": 4.984540224075317, "rewards_train/margins_2": 5.53633713722229, "step": 518 }, { "epoch": 1.55, "logps_train/policy_1_2": -198.25845336914062, "logps_train/policy_1_l": -175.37930297851562, "logps_train/policy_1_w": -109.22065734863281, "logps_train/policy_2_2": -127.86982727050781, "logps_train/policy_2_w": -168.49998474121094, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.3832685947418213, "rewards_train/1-l": -2.2744059562683105, "rewards_train/1-w": 3.0181679725646973, "rewards_train/2-2": 3.134697198867798, "rewards_train/2-w": -0.7953110337257385, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.292573928833008, "rewards_train/margins_1": 4.4014365673065186, "rewards_train/margins_2": 3.9300082325935364, "step": 518 }, { "epoch": 1.55, "logps_train/policy_1_2": -162.85565185546875, "logps_train/policy_1_l": -120.67233276367188, "logps_train/policy_1_w": -114.04200744628906, "logps_train/policy_2_2": -101.51762390136719, "logps_train/policy_2_w": -184.22006225585938, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -101.5, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.8484556674957275, "rewards_train/1-l": -1.902389407157898, "rewards_train/1-w": 2.6782209873199463, "rewards_train/2-2": 2.337495803833008, "rewards_train/2-w": -2.171225070953369, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.580610394477844, "rewards_train/margins_1": 4.526676654815674, "rewards_train/margins_2": 4.508720874786377, "step": 518 }, { "epoch": 1.55, "logps_train/policy_1_2": -221.68243408203125, "logps_train/policy_1_l": -211.818115234375, "logps_train/policy_1_w": -143.20361328125, "logps_train/policy_2_2": -153.47879028320312, "logps_train/policy_2_w": -218.486572265625, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -0.8866994380950928, "rewards_train/1-l": -3.079956531524658, "rewards_train/1-w": 3.793506383895874, "rewards_train/2-2": 3.792356014251709, "rewards_train/2-w": -1.2365481853485107, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.873462915420532, "rewards_train/margins_1": 4.680205821990967, "rewards_train/margins_2": 5.02890419960022, "step": 519 }, { "epoch": 1.55, "logps_train/policy_1_2": -133.8211669921875, "logps_train/policy_1_l": -182.69851684570312, "logps_train/policy_1_w": -120.43247985839844, "logps_train/policy_2_2": -93.60911560058594, "logps_train/policy_2_w": -169.0574951171875, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -0.19500744342803955, "rewards_train/1-l": -3.0752952098846436, "rewards_train/1-w": 3.3389790058135986, "rewards_train/2-2": 2.715651512145996, "rewards_train/2-w": -0.09481334686279297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.414274215698242, "rewards_train/margins_1": 3.533986449241638, "rewards_train/margins_2": 2.810464859008789, "step": 519 }, { "epoch": 1.55, "logps_train/policy_1_2": -132.9285125732422, "logps_train/policy_1_l": -156.56256103515625, "logps_train/policy_1_w": -108.31832122802734, "logps_train/policy_2_2": -78.511474609375, "logps_train/policy_2_w": -190.90847778320312, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -101.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -1.1350382566452026, "rewards_train/1-l": -2.129009962081909, "rewards_train/1-w": 3.493558406829834, "rewards_train/2-2": 2.2666258811950684, "rewards_train/2-w": -1.9892841577529907, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.622568368911743, "rewards_train/margins_1": 4.628596663475037, "rewards_train/margins_2": 4.255910038948059, "step": 519 }, { "epoch": 1.55, "logps_train/policy_1_2": -285.5963134765625, "logps_train/policy_1_l": -260.30303955078125, "logps_train/policy_1_w": -126.76123809814453, "logps_train/policy_2_2": -187.4591522216797, "logps_train/policy_2_w": -198.42172241210938, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -232.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -2.220571517944336, "rewards_train/1-l": -2.786357879638672, "rewards_train/1-w": 3.213696002960205, "rewards_train/2-2": 4.475958824157715, "rewards_train/2-w": -0.8817224502563477, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.000053882598877, "rewards_train/margins_1": 5.434267520904541, "rewards_train/margins_2": 5.3576812744140625, "step": 519 }, { "epoch": 1.56, "learning_rate": 6.554168023956817e-07, "loss": 0.5864, "step": 520 }, { "epoch": 1.56, "logps_train/policy_1_2": -191.3901824951172, "logps_train/policy_1_l": -124.23770904541016, "logps_train/policy_1_w": -110.04825592041016, "logps_train/policy_2_2": -124.79632568359375, "logps_train/policy_2_w": -166.36343383789062, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -1.3835487365722656, "rewards_train/1-l": -1.417227864265442, "rewards_train/1-w": 2.9627525806427, "rewards_train/2-2": 3.0578670501708984, "rewards_train/2-w": -1.0422033071517944, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.379980444908142, "rewards_train/margins_1": 4.346301317214966, "rewards_train/margins_2": 4.100070357322693, "step": 520 }, { "epoch": 1.56, "logps_train/policy_1_2": -132.6930694580078, "logps_train/policy_1_l": -83.81503295898438, "logps_train/policy_1_w": -76.09142303466797, "logps_train/policy_2_2": -83.8563232421875, "logps_train/policy_2_w": -119.8364486694336, "logps_train/ref_1_2": -123.5, "logps_train/ref_1_l": -67.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -118.5, "rewards_train/1-2": -0.9001173973083496, "rewards_train/1-l": -1.6857025623321533, "rewards_train/1-w": 2.880213499069214, "rewards_train/2-2": 2.63800048828125, "rewards_train/2-w": -0.1395045518875122, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.565916061401367, "rewards_train/margins_1": 3.7803308963775635, "rewards_train/margins_2": 2.777505040168762, "step": 520 }, { "epoch": 1.56, "logps_train/policy_1_2": -157.64395141601562, "logps_train/policy_1_l": -113.61296081542969, "logps_train/policy_1_w": -98.46333312988281, "logps_train/policy_2_2": -101.83001708984375, "logps_train/policy_2_w": -161.53955078125, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -1.2386142015457153, "rewards_train/1-l": -2.0141286849975586, "rewards_train/1-w": 2.3966355323791504, "rewards_train/2-2": 2.5369200706481934, "rewards_train/2-w": -1.8367681503295898, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.410764217376709, "rewards_train/margins_1": 3.6352497339248657, "rewards_train/margins_2": 4.373688220977783, "step": 520 }, { "epoch": 1.56, "logps_train/policy_1_2": -169.08023071289062, "logps_train/policy_1_l": -132.06582641601562, "logps_train/policy_1_w": -99.56756591796875, "logps_train/policy_2_2": -105.00379943847656, "logps_train/policy_2_w": -165.8431396484375, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.444350242614746, "rewards_train/1-l": -1.4580234289169312, "rewards_train/1-w": 2.6040592193603516, "rewards_train/2-2": 2.8566513061523438, "rewards_train/2-w": -1.7328492403030396, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.062082648277283, "rewards_train/margins_1": 4.048409461975098, "rewards_train/margins_2": 4.589500546455383, "step": 520 }, { "epoch": 1.56, "logps_train/policy_1_2": -129.66973876953125, "logps_train/policy_1_l": -154.3373565673828, "logps_train/policy_1_w": -133.00698852539062, "logps_train/policy_2_2": -81.56230163574219, "logps_train/policy_2_w": -204.49781799316406, "logps_train/ref_1_2": -116.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.3235218524932861, "rewards_train/1-l": -2.344581365585327, "rewards_train/1-w": 3.016488552093506, "rewards_train/2-2": 1.8743178844451904, "rewards_train/2-w": -2.2107200622558594, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.361069917678833, "rewards_train/margins_1": 4.340010404586792, "rewards_train/margins_2": 4.08503794670105, "step": 521 }, { "epoch": 1.56, "logps_train/policy_1_2": -245.04861450195312, "logps_train/policy_1_l": -178.0756072998047, "logps_train/policy_1_w": -119.87736511230469, "logps_train/policy_2_2": -152.44296264648438, "logps_train/policy_2_w": -194.48202514648438, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -2.8349409103393555, "rewards_train/1-l": -1.8711835145950317, "rewards_train/1-w": 2.6798417568206787, "rewards_train/2-2": 3.3471102714538574, "rewards_train/2-w": -1.983748435974121, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.5510252714157104, "rewards_train/margins_1": 5.514782667160034, "rewards_train/margins_2": 5.3308587074279785, "step": 521 }, { "epoch": 1.56, "logps_train/policy_1_2": -165.43408203125, "logps_train/policy_1_l": -134.6739044189453, "logps_train/policy_1_w": -76.58473205566406, "logps_train/policy_2_2": -97.88819885253906, "logps_train/policy_2_w": -142.33834838867188, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -110.5, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": -2.0117664337158203, "rewards_train/1-l": -2.4506173133850098, "rewards_train/1-w": 2.542894124984741, "rewards_train/2-2": 2.5686020851135254, "rewards_train/2-w": -1.722505807876587, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.993511438369751, "rewards_train/margins_1": 4.5546605587005615, "rewards_train/margins_2": 4.291107892990112, "step": 521 }, { "epoch": 1.56, "logps_train/policy_1_2": -165.66436767578125, "logps_train/policy_1_l": -125.63121032714844, "logps_train/policy_1_w": -97.38713073730469, "logps_train/policy_2_2": -99.15227508544922, "logps_train/policy_2_w": -152.88864135742188, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -2.1683902740478516, "rewards_train/1-l": -1.6319198608398438, "rewards_train/1-w": 3.113240957260132, "rewards_train/2-2": 2.3296947479248047, "rewards_train/2-w": -0.3951139450073242, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.745160818099976, "rewards_train/margins_1": 5.281631231307983, "rewards_train/margins_2": 2.724808692932129, "step": 521 }, { "epoch": 1.56, "learning_rate": 6.38836320002468e-07, "loss": 0.6582, "step": 522 }, { "epoch": 1.56, "logps_train/policy_1_2": -213.13441467285156, "logps_train/policy_1_l": -205.9207763671875, "logps_train/policy_1_w": -137.87554931640625, "logps_train/policy_2_2": -141.83766174316406, "logps_train/policy_2_w": -217.81939697265625, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.5607062578201294, "rewards_train/1-l": -2.501044750213623, "rewards_train/1-w": 3.41322660446167, "rewards_train/2-2": 2.794750213623047, "rewards_train/2-w": -1.7483454942703247, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.914271354675293, "rewards_train/margins_1": 4.973932862281799, "rewards_train/margins_2": 4.543095707893372, "step": 522 }, { "epoch": 1.56, "logps_train/policy_1_2": -238.5924530029297, "logps_train/policy_1_l": -144.14109802246094, "logps_train/policy_1_w": -110.37740325927734, "logps_train/policy_2_2": -163.10189819335938, "logps_train/policy_2_w": -166.99771118164062, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.704068899154663, "rewards_train/1-l": -1.838732123374939, "rewards_train/1-w": 2.952481746673584, "rewards_train/2-2": 3.4849281311035156, "rewards_train/2-w": -0.7984049916267395, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.791213870048523, "rewards_train/margins_1": 4.656550645828247, "rewards_train/margins_2": 4.283333122730255, "step": 522 }, { "epoch": 1.56, "logps_train/policy_1_2": -195.33473205566406, "logps_train/policy_1_l": -164.99549865722656, "logps_train/policy_1_w": -139.13616943359375, "logps_train/policy_2_2": -127.84555053710938, "logps_train/policy_2_w": -225.18740844726562, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.4663015604019165, "rewards_train/1-l": -2.5444374084472656, "rewards_train/1-w": 3.2676315307617188, "rewards_train/2-2": 2.893857002258301, "rewards_train/2-w": -2.5603415966033936, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.812068939208984, "rewards_train/margins_1": 4.733933091163635, "rewards_train/margins_2": 5.454198598861694, "step": 522 }, { "epoch": 1.56, "logps_train/policy_1_2": -242.31097412109375, "logps_train/policy_1_l": -201.3106231689453, "logps_train/policy_1_w": -124.23924255371094, "logps_train/policy_2_2": -158.7550811767578, "logps_train/policy_2_w": -195.84295654296875, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -2.6577072143554688, "rewards_train/1-l": -2.2204174995422363, "rewards_train/1-w": 3.4538097381591797, "rewards_train/2-2": 3.325273275375366, "rewards_train/2-w": -1.2245323657989502, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.674227237701416, "rewards_train/margins_1": 6.111516952514648, "rewards_train/margins_2": 4.549805641174316, "step": 522 }, { "epoch": 1.57, "logps_train/policy_1_2": -221.62310791015625, "logps_train/policy_1_l": -180.64541625976562, "logps_train/policy_1_w": -103.18048858642578, "logps_train/policy_2_2": -143.1473388671875, "logps_train/policy_2_w": -168.06541442871094, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": -2.206451892852783, "rewards_train/1-l": -2.479580879211426, "rewards_train/1-w": 2.663201093673706, "rewards_train/2-2": 2.8110480308532715, "rewards_train/2-w": -1.3670883178710938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.142781972885132, "rewards_train/margins_1": 4.869652986526489, "rewards_train/margins_2": 4.178136348724365, "step": 523 }, { "epoch": 1.57, "logps_train/policy_1_2": -201.75930786132812, "logps_train/policy_1_l": -186.7125244140625, "logps_train/policy_1_w": -110.23067474365234, "logps_train/policy_2_2": -130.71682739257812, "logps_train/policy_2_w": -183.45736694335938, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -2.161868095397949, "rewards_train/1-l": -2.2509403228759766, "rewards_train/1-w": 3.0612101554870605, "rewards_train/2-2": 2.4064416885375977, "rewards_train/2-w": -1.5969080924987793, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.312150478363037, "rewards_train/margins_1": 5.22307825088501, "rewards_train/margins_2": 4.003349781036377, "step": 523 }, { "epoch": 1.57, "logps_train/policy_1_2": -212.13754272460938, "logps_train/policy_1_l": -206.5596923828125, "logps_train/policy_1_w": -113.83441162109375, "logps_train/policy_2_2": -121.61436462402344, "logps_train/policy_2_w": -195.2369384765625, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -2.302426815032959, "rewards_train/1-l": -2.9373044967651367, "rewards_train/1-w": 3.255621910095215, "rewards_train/2-2": 3.4342660903930664, "rewards_train/2-w": -1.8135370016098022, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.192926406860352, "rewards_train/margins_1": 5.558048725128174, "rewards_train/margins_2": 5.247803092002869, "step": 523 }, { "epoch": 1.57, "logps_train/policy_1_2": -166.44784545898438, "logps_train/policy_1_l": -151.8199462890625, "logps_train/policy_1_w": -96.74057006835938, "logps_train/policy_2_2": -94.42584991455078, "logps_train/policy_2_w": -176.02108764648438, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -132.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -1.7580654621124268, "rewards_train/1-l": -1.986291766166687, "rewards_train/1-w": 3.0603179931640625, "rewards_train/2-2": 2.5472588539123535, "rewards_train/2-w": -2.0087482929229736, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.0466097593307495, "rewards_train/margins_1": 4.818383455276489, "rewards_train/margins_2": 4.556007146835327, "step": 523 }, { "epoch": 1.57, "learning_rate": 6.224374839033928e-07, "loss": 0.5207, "step": 524 }, { "epoch": 1.57, "logps_train/policy_1_2": -185.36273193359375, "logps_train/policy_1_l": -168.8858184814453, "logps_train/policy_1_w": -103.89765930175781, "logps_train/policy_2_2": -126.60446166992188, "logps_train/policy_2_w": -170.0830078125, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -1.5343189239501953, "rewards_train/1-l": -2.2925381660461426, "rewards_train/1-w": 3.161015033721924, "rewards_train/2-2": 2.572366714477539, "rewards_train/2-w": -1.2223632335662842, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.453553199768066, "rewards_train/margins_1": 4.695333957672119, "rewards_train/margins_2": 3.7947299480438232, "step": 524 }, { "epoch": 1.57, "logps_train/policy_1_2": -254.43423461914062, "logps_train/policy_1_l": -260.4306640625, "logps_train/policy_1_w": -179.30490112304688, "logps_train/policy_2_2": -168.9720001220703, "logps_train/policy_2_w": -270.69244384765625, "logps_train/ref_1_2": -231.0, "logps_train/ref_1_l": -224.0, "logps_train/ref_1_w": -218.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -251.0, "rewards_train/1-2": -2.3449864387512207, "rewards_train/1-l": -3.6946280002593994, "rewards_train/1-w": 3.8511502742767334, "rewards_train/2-2": 3.4735021591186523, "rewards_train/2-w": -1.9743242263793945, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.545778274536133, "rewards_train/margins_1": 6.196136713027954, "rewards_train/margins_2": 5.447826385498047, "step": 524 }, { "epoch": 1.57, "logps_train/policy_1_2": -197.4527587890625, "logps_train/policy_1_l": -156.90701293945312, "logps_train/policy_1_w": -103.29523468017578, "logps_train/policy_2_2": -127.89854431152344, "logps_train/policy_2_w": -156.73788452148438, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -1.8812134265899658, "rewards_train/1-l": -2.2323508262634277, "rewards_train/1-w": 2.791179656982422, "rewards_train/2-2": 2.8896377086639404, "rewards_train/2-w": -0.7542574405670166, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.02353048324585, "rewards_train/margins_1": 4.672393083572388, "rewards_train/margins_2": 3.643895149230957, "step": 524 }, { "epoch": 1.57, "logps_train/policy_1_2": -137.15350341796875, "logps_train/policy_1_l": -122.82032012939453, "logps_train/policy_1_w": -103.52788543701172, "logps_train/policy_2_2": -82.61079406738281, "logps_train/policy_2_w": -164.48715209960938, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -106.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -1.2553410530090332, "rewards_train/1-l": -1.6291513442993164, "rewards_train/1-w": 2.815277576446533, "rewards_train/2-2": 2.3524465560913086, "rewards_train/2-w": -1.3854339122772217, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.44442892074585, "rewards_train/margins_1": 4.070618629455566, "rewards_train/margins_2": 3.7378804683685303, "step": 524 }, { "epoch": 1.57, "logps_train/policy_1_2": -124.056396484375, "logps_train/policy_1_l": -106.06025695800781, "logps_train/policy_1_w": -61.68740463256836, "logps_train/policy_2_2": -69.67547607421875, "logps_train/policy_2_w": -118.54295349121094, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -85.5, "logps_train/ref_1_w": -79.5, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -99.0, "rewards_train/1-2": -1.278686761856079, "rewards_train/1-l": -2.0829787254333496, "rewards_train/1-w": 1.8033297061920166, "rewards_train/2-2": 2.254035234451294, "rewards_train/2-w": -1.9770491123199463, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.886308431625366, "rewards_train/margins_1": 3.0820164680480957, "rewards_train/margins_2": 4.23108434677124, "step": 525 }, { "epoch": 1.57, "logps_train/policy_1_2": -197.12136840820312, "logps_train/policy_1_l": -208.23602294921875, "logps_train/policy_1_w": -113.24874877929688, "logps_train/policy_2_2": -136.61019897460938, "logps_train/policy_2_w": -174.13665771484375, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.0676254034042358, "rewards_train/1-l": -2.6958608627319336, "rewards_train/1-w": 3.232065200805664, "rewards_train/2-2": 3.066030979156494, "rewards_train/2-w": -0.8253364562988281, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.927926063537598, "rewards_train/margins_1": 4.2996906042099, "rewards_train/margins_2": 3.8913674354553223, "step": 525 }, { "epoch": 1.57, "logps_train/policy_1_2": -160.1607208251953, "logps_train/policy_1_l": -133.1463623046875, "logps_train/policy_1_w": -102.17121124267578, "logps_train/policy_2_2": -99.52391052246094, "logps_train/policy_2_w": -169.49514770507812, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": -1.758259654045105, "rewards_train/1-l": -2.5518925189971924, "rewards_train/1-w": 3.1516289710998535, "rewards_train/2-2": 2.2917487621307373, "rewards_train/2-w": -1.4647501707077026, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.703521490097046, "rewards_train/margins_1": 4.9098886251449585, "rewards_train/margins_2": 3.75649893283844, "step": 525 }, { "epoch": 1.57, "logps_train/policy_1_2": -158.4727020263672, "logps_train/policy_1_l": -167.06402587890625, "logps_train/policy_1_w": -106.93714141845703, "logps_train/policy_2_2": -98.99652099609375, "logps_train/policy_2_w": -178.30615234375, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.6714890003204346, "rewards_train/1-l": -2.520512819290161, "rewards_train/1-w": 2.9987664222717285, "rewards_train/2-2": 2.4601125717163086, "rewards_train/2-w": -1.8964838981628418, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.51927924156189, "rewards_train/margins_1": 4.670255422592163, "rewards_train/margins_2": 4.35659646987915, "step": 525 }, { "epoch": 1.57, "learning_rate": 6.062218945959497e-07, "loss": 0.6493, "step": 526 }, { "epoch": 1.57, "logps_train/policy_1_2": -156.9705352783203, "logps_train/policy_1_l": -141.46241760253906, "logps_train/policy_1_w": -98.88370513916016, "logps_train/policy_2_2": -101.38339233398438, "logps_train/policy_2_w": -163.8416748046875, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -1.6751787662506104, "rewards_train/1-l": -2.8460464477539062, "rewards_train/1-w": 3.1303796768188477, "rewards_train/2-2": 2.443692207336426, "rewards_train/2-w": -1.4083868265151978, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.976426124572754, "rewards_train/margins_1": 4.805558443069458, "rewards_train/margins_2": 3.8520790338516235, "step": 526 }, { "epoch": 1.57, "logps_train/policy_1_2": -257.8953857421875, "logps_train/policy_1_l": -212.5670166015625, "logps_train/policy_1_w": -155.16493225097656, "logps_train/policy_2_2": -174.54766845703125, "logps_train/policy_2_w": -252.76220703125, "logps_train/ref_1_2": -235.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -208.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": -2.301254987716675, "rewards_train/1-l": -2.2957029342651367, "rewards_train/1-w": 4.06358528137207, "rewards_train/2-2": 3.2725775241851807, "rewards_train/2-w": -2.375537157058716, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.359288215637207, "rewards_train/margins_1": 6.364840269088745, "rewards_train/margins_2": 5.6481146812438965, "step": 526 }, { "epoch": 1.57, "logps_train/policy_1_2": -224.15615844726562, "logps_train/policy_1_l": -210.60006713867188, "logps_train/policy_1_w": -135.53668212890625, "logps_train/policy_2_2": -148.38653564453125, "logps_train/policy_2_w": -193.7984619140625, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.8652241230010986, "rewards_train/1-l": -2.4359829425811768, "rewards_train/1-w": 3.097111701965332, "rewards_train/2-2": 3.2271676063537598, "rewards_train/2-w": -0.7661744356155396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.533094644546509, "rewards_train/margins_1": 4.962335824966431, "rewards_train/margins_2": 3.9933420419692993, "step": 526 }, { "epoch": 1.57, "logps_train/policy_1_2": -226.43692016601562, "logps_train/policy_1_l": -212.96939086914062, "logps_train/policy_1_w": -91.64785766601562, "logps_train/policy_2_2": -138.28860473632812, "logps_train/policy_2_w": -149.5106201171875, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": -2.6175196170806885, "rewards_train/1-l": -2.799870252609253, "rewards_train/1-w": 2.26910138130188, "rewards_train/2-2": 2.8871545791625977, "rewards_train/2-w": -1.3284066915512085, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.068971633911133, "rewards_train/margins_1": 4.886620998382568, "rewards_train/margins_2": 4.215561270713806, "step": 526 }, { "epoch": 1.58, "logps_train/policy_1_2": -247.1062774658203, "logps_train/policy_1_l": -215.01486206054688, "logps_train/policy_1_w": -156.64718627929688, "logps_train/policy_2_2": -157.6533203125, "logps_train/policy_2_w": -239.66888427734375, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": -2.4285976886749268, "rewards_train/1-l": -2.851095199584961, "rewards_train/1-w": 3.407156467437744, "rewards_train/2-2": 3.3588879108428955, "rewards_train/2-w": -1.6629819869995117, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.258251667022705, "rewards_train/margins_1": 5.835754156112671, "rewards_train/margins_2": 5.021869897842407, "step": 527 }, { "epoch": 1.58, "logps_train/policy_1_2": -184.43759155273438, "logps_train/policy_1_l": -219.7931671142578, "logps_train/policy_1_w": -149.34506225585938, "logps_train/policy_2_2": -131.22862243652344, "logps_train/policy_2_w": -241.97470092773438, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": -1.1903159618377686, "rewards_train/1-l": -2.4207963943481445, "rewards_train/1-w": 3.2713541984558105, "rewards_train/2-2": 2.589686870574951, "rewards_train/2-w": -2.6670002937316895, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.692150592803955, "rewards_train/margins_1": 4.461670160293579, "rewards_train/margins_2": 5.256687164306641, "step": 527 }, { "epoch": 1.58, "logps_train/policy_1_2": -172.44241333007812, "logps_train/policy_1_l": -209.51220703125, "logps_train/policy_1_w": -121.76231384277344, "logps_train/policy_2_2": -111.40296936035156, "logps_train/policy_2_w": -188.13467407226562, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -1.6993179321289062, "rewards_train/1-l": -3.018311023712158, "rewards_train/1-w": 2.739199161529541, "rewards_train/2-2": 2.351181983947754, "rewards_train/2-w": -1.3033113479614258, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.757510185241699, "rewards_train/margins_1": 4.438517093658447, "rewards_train/margins_2": 3.6544933319091797, "step": 527 }, { "epoch": 1.58, "logps_train/policy_1_2": -127.66641235351562, "logps_train/policy_1_l": -92.37796783447266, "logps_train/policy_1_w": -68.64253234863281, "logps_train/policy_2_2": -84.00279998779297, "logps_train/policy_2_w": -124.39839935302734, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -79.5, "logps_train/ref_1_w": -94.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -111.5, "rewards_train/1-2": -0.7986726760864258, "rewards_train/1-l": -1.2911174297332764, "rewards_train/1-w": 2.520512580871582, "rewards_train/2-2": 2.3313608169555664, "rewards_train/2-w": -1.285738468170166, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.8116300106048584, "rewards_train/margins_1": 3.319185256958008, "rewards_train/margins_2": 3.6170992851257324, "step": 527 }, { "epoch": 1.58, "learning_rate": 5.901911346930688e-07, "loss": 0.4662, "step": 528 }, { "epoch": 1.58, "logps_train/policy_1_2": -143.68690490722656, "logps_train/policy_1_l": -115.84056091308594, "logps_train/policy_1_w": -108.18172454833984, "logps_train/policy_2_2": -85.69328308105469, "logps_train/policy_2_w": -169.39810180664062, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.1382217407226562, "rewards_train/1-l": -1.785033106803894, "rewards_train/1-w": 3.136514902114868, "rewards_train/2-2": 2.751570224761963, "rewards_train/2-w": -1.0116854906082153, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.921548008918762, "rewards_train/margins_1": 4.274736642837524, "rewards_train/margins_2": 3.7632557153701782, "step": 528 }, { "epoch": 1.58, "logps_train/policy_1_2": -208.01651000976562, "logps_train/policy_1_l": -210.4305419921875, "logps_train/policy_1_w": -166.97445678710938, "logps_train/policy_2_2": -127.14898681640625, "logps_train/policy_2_w": -273.032470703125, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -241.0, "rewards_train/1-2": -2.238760471343994, "rewards_train/1-l": -2.67181396484375, "rewards_train/1-w": 3.9763693809509277, "rewards_train/2-2": 2.805023193359375, "rewards_train/2-w": -3.215209722518921, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.648183345794678, "rewards_train/margins_1": 6.215129852294922, "rewards_train/margins_2": 6.020232915878296, "step": 528 }, { "epoch": 1.58, "logps_train/policy_1_2": -166.10316467285156, "logps_train/policy_1_l": -145.24472045898438, "logps_train/policy_1_w": -118.8460693359375, "logps_train/policy_2_2": -101.72412872314453, "logps_train/policy_2_w": -194.14596557617188, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -1.9445457458496094, "rewards_train/1-l": -1.9763764142990112, "rewards_train/1-w": 3.15289306640625, "rewards_train/2-2": 2.38832950592041, "rewards_train/2-w": -1.9364718198776245, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.129269480705261, "rewards_train/margins_1": 5.097438812255859, "rewards_train/margins_2": 4.324801325798035, "step": 528 }, { "epoch": 1.58, "logps_train/policy_1_2": -166.55166625976562, "logps_train/policy_1_l": -193.34437561035156, "logps_train/policy_1_w": -101.70071411132812, "logps_train/policy_2_2": -107.54396057128906, "logps_train/policy_2_w": -180.62945556640625, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.7145406007766724, "rewards_train/1-l": -2.4474270343780518, "rewards_train/1-w": 3.435007095336914, "rewards_train/2-2": 2.285642623901367, "rewards_train/2-w": -2.1629457473754883, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.882434129714966, "rewards_train/margins_1": 5.149547696113586, "rewards_train/margins_2": 4.4485883712768555, "step": 528 }, { "epoch": 1.58, "logps_train/policy_1_2": -154.61489868164062, "logps_train/policy_1_l": -123.6415023803711, "logps_train/policy_1_w": -95.57763671875, "logps_train/policy_2_2": -91.18480682373047, "logps_train/policy_2_w": -167.6063690185547, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.9497721195220947, "rewards_train/1-l": -2.427431583404541, "rewards_train/1-w": 2.700928211212158, "rewards_train/2-2": 2.3694100379943848, "rewards_train/2-w": -1.9584887027740479, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.128359794616699, "rewards_train/margins_1": 4.650700330734253, "rewards_train/margins_2": 4.327898740768433, "step": 529 }, { "epoch": 1.58, "logps_train/policy_1_2": -144.05838012695312, "logps_train/policy_1_l": -156.86769104003906, "logps_train/policy_1_w": -115.49222564697266, "logps_train/policy_2_2": -95.80258178710938, "logps_train/policy_2_w": -180.516357421875, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -0.8527125120162964, "rewards_train/1-l": -2.359633207321167, "rewards_train/1-w": 2.66093373298645, "rewards_train/2-2": 2.2556796073913574, "rewards_train/2-w": -1.818433403968811, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.020566940307617, "rewards_train/margins_1": 3.5136462450027466, "rewards_train/margins_2": 4.0741130113601685, "step": 529 }, { "epoch": 1.58, "logps_train/policy_1_2": -205.09625244140625, "logps_train/policy_1_l": -211.48121643066406, "logps_train/policy_1_w": -111.69835662841797, "logps_train/policy_2_2": -137.56179809570312, "logps_train/policy_2_w": -170.01882934570312, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.7994675636291504, "rewards_train/1-l": -3.216383695602417, "rewards_train/1-w": 2.794031858444214, "rewards_train/2-2": 2.896554470062256, "rewards_train/2-w": -1.100319743156433, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.010415554046631, "rewards_train/margins_1": 4.593499422073364, "rewards_train/margins_2": 3.996874213218689, "step": 529 }, { "epoch": 1.58, "logps_train/policy_1_2": -159.53219604492188, "logps_train/policy_1_l": -146.17698669433594, "logps_train/policy_1_w": -124.64920043945312, "logps_train/policy_2_2": -107.5166015625, "logps_train/policy_2_w": -175.8044891357422, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -1.023531436920166, "rewards_train/1-l": -2.2920145988464355, "rewards_train/1-w": 3.023751735687256, "rewards_train/2-2": 2.4631829261779785, "rewards_train/2-w": -0.49529144167900085, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.315766334533691, "rewards_train/margins_1": 4.047283172607422, "rewards_train/margins_2": 2.9584743678569794, "step": 529 }, { "epoch": 1.59, "learning_rate": 5.743467687686563e-07, "loss": 0.5318, "step": 530 }, { "epoch": 1.59, "logps_train/policy_1_2": -161.63226318359375, "logps_train/policy_1_l": -176.60162353515625, "logps_train/policy_1_w": -106.74455261230469, "logps_train/policy_2_2": -106.91583251953125, "logps_train/policy_2_w": -180.04754638671875, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.0768970251083374, "rewards_train/1-l": -1.4697825908660889, "rewards_train/1-w": 3.3407058715820312, "rewards_train/2-2": 2.3826355934143066, "rewards_train/2-w": -1.3347837924957275, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.81048846244812, "rewards_train/margins_1": 4.417602896690369, "rewards_train/margins_2": 3.717419385910034, "step": 530 }, { "epoch": 1.59, "logps_train/policy_1_2": -169.9102783203125, "logps_train/policy_1_l": -144.102294921875, "logps_train/policy_1_w": -96.99933624267578, "logps_train/policy_2_2": -112.3465805053711, "logps_train/policy_2_w": -150.4539794921875, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": -1.533606767654419, "rewards_train/1-l": -2.1045408248901367, "rewards_train/1-w": 2.2346365451812744, "rewards_train/2-2": 2.641904830932617, "rewards_train/2-w": -1.1825084686279297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.339177370071411, "rewards_train/margins_1": 3.7682433128356934, "rewards_train/margins_2": 3.824413299560547, "step": 530 }, { "epoch": 1.59, "logps_train/policy_1_2": -159.3010711669922, "logps_train/policy_1_l": -137.4396514892578, "logps_train/policy_1_w": -92.9535903930664, "logps_train/policy_2_2": -90.37240600585938, "logps_train/policy_2_w": -164.29373168945312, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -113.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -1.7285451889038086, "rewards_train/1-l": -2.4659619331359863, "rewards_train/1-w": 2.481374740600586, "rewards_train/2-2": 2.5471348762512207, "rewards_train/2-w": -1.6841105222702026, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.947336673736572, "rewards_train/margins_1": 4.2099199295043945, "rewards_train/margins_2": 4.231245398521423, "step": 530 }, { "epoch": 1.59, "logps_train/policy_1_2": -156.51651000976562, "logps_train/policy_1_l": -102.19998168945312, "logps_train/policy_1_w": -87.8038330078125, "logps_train/policy_2_2": -94.36264038085938, "logps_train/policy_2_w": -148.7711944580078, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -83.5, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": -1.4145405292510986, "rewards_train/1-l": -1.8748807907104492, "rewards_train/1-w": 2.751648426055908, "rewards_train/2-2": 2.4826812744140625, "rewards_train/2-w": -1.3130568265914917, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.626529216766357, "rewards_train/margins_1": 4.166188955307007, "rewards_train/margins_2": 3.795738101005554, "step": 530 }, { "epoch": 1.59, "logps_train/policy_1_2": -187.6783447265625, "logps_train/policy_1_l": -155.62474060058594, "logps_train/policy_1_w": -113.3890380859375, "logps_train/policy_2_2": -122.15116882324219, "logps_train/policy_2_w": -172.64447021484375, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -1.4272089004516602, "rewards_train/1-l": -2.048997402191162, "rewards_train/1-w": 2.834728717803955, "rewards_train/2-2": 2.9229702949523926, "rewards_train/2-w": -1.2103450298309326, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.883726119995117, "rewards_train/margins_1": 4.261937618255615, "rewards_train/margins_2": 4.133315324783325, "step": 531 }, { "epoch": 1.59, "logps_train/policy_1_2": -149.15957641601562, "logps_train/policy_1_l": -178.69065856933594, "logps_train/policy_1_w": -126.3057632446289, "logps_train/policy_2_2": -103.0853271484375, "logps_train/policy_2_w": -199.01348876953125, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -0.9579994082450867, "rewards_train/1-l": -2.181370735168457, "rewards_train/1-w": 3.143251419067383, "rewards_train/2-2": 1.983264684677124, "rewards_train/2-w": -1.8743946552276611, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.32462215423584, "rewards_train/margins_1": 4.1012508273124695, "rewards_train/margins_2": 3.857659339904785, "step": 531 }, { "epoch": 1.59, "logps_train/policy_1_2": -215.47802734375, "logps_train/policy_1_l": -183.33657836914062, "logps_train/policy_1_w": -140.87026977539062, "logps_train/policy_2_2": -139.134765625, "logps_train/policy_2_w": -229.29519653320312, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -2.237255096435547, "rewards_train/1-l": -2.3649089336395264, "rewards_train/1-w": 3.394613027572632, "rewards_train/2-2": 2.8452141284942627, "rewards_train/2-w": -2.2283477783203125, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.759521961212158, "rewards_train/margins_1": 5.631868124008179, "rewards_train/margins_2": 5.073561906814575, "step": 531 }, { "epoch": 1.59, "logps_train/policy_1_2": -175.98199462890625, "logps_train/policy_1_l": -154.98561096191406, "logps_train/policy_1_w": -98.3948974609375, "logps_train/policy_2_2": -115.93658447265625, "logps_train/policy_2_w": -168.73760986328125, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -1.3083571195602417, "rewards_train/1-l": -2.0225841999053955, "rewards_train/1-w": 2.93316650390625, "rewards_train/2-2": 2.3932552337646484, "rewards_train/2-w": -1.6112608909606934, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.9557507038116455, "rewards_train/margins_1": 4.241523623466492, "rewards_train/margins_2": 4.004516124725342, "step": 531 }, { "epoch": 1.59, "learning_rate": 5.586903432048943e-07, "loss": 0.5946, "step": 532 }, { "epoch": 1.59, "logps_train/policy_1_2": -186.6641845703125, "logps_train/policy_1_l": -144.43081665039062, "logps_train/policy_1_w": -106.87022399902344, "logps_train/policy_2_2": -124.87802124023438, "logps_train/policy_2_w": -164.3520965576172, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -122.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -1.487707257270813, "rewards_train/1-l": -2.1679835319519043, "rewards_train/1-w": 2.740419387817383, "rewards_train/2-2": 2.678506374359131, "rewards_train/2-w": -1.1281778812408447, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.908402919769287, "rewards_train/margins_1": 4.228126645088196, "rewards_train/margins_2": 3.8066842555999756, "step": 532 }, { "epoch": 1.59, "logps_train/policy_1_2": -153.4702606201172, "logps_train/policy_1_l": -161.89834594726562, "logps_train/policy_1_w": -117.45757293701172, "logps_train/policy_2_2": -92.6136474609375, "logps_train/policy_2_w": -184.79403686523438, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.457963466644287, "rewards_train/1-l": -2.1135411262512207, "rewards_train/1-w": 2.65580415725708, "rewards_train/2-2": 2.4632441997528076, "rewards_train/2-w": -1.870027780532837, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.769345283508301, "rewards_train/margins_1": 4.113767623901367, "rewards_train/margins_2": 4.3332719802856445, "step": 532 }, { "epoch": 1.59, "logps_train/policy_1_2": -207.49014282226562, "logps_train/policy_1_l": -235.90069580078125, "logps_train/policy_1_w": -144.40753173828125, "logps_train/policy_2_2": -137.82046508789062, "logps_train/policy_2_w": -229.4625244140625, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -1.3029193878173828, "rewards_train/1-l": -3.3132145404815674, "rewards_train/1-w": 3.7694029808044434, "rewards_train/2-2": 3.3287930488586426, "rewards_train/2-w": -1.7568007707595825, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 7.082617521286011, "rewards_train/margins_1": 5.072322368621826, "rewards_train/margins_2": 5.085593819618225, "step": 532 }, { "epoch": 1.59, "logps_train/policy_1_2": -167.65127563476562, "logps_train/policy_1_l": -178.79434204101562, "logps_train/policy_1_w": -115.900390625, "logps_train/policy_2_2": -100.85775756835938, "logps_train/policy_2_w": -193.33924865722656, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.6315336227416992, "rewards_train/1-l": -2.099550724029541, "rewards_train/1-w": 2.7115235328674316, "rewards_train/2-2": 2.8138339519500732, "rewards_train/2-w": -2.325331926345825, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.811074256896973, "rewards_train/margins_1": 4.343057155609131, "rewards_train/margins_2": 5.139165878295898, "step": 532 }, { "epoch": 1.6, "logps_train/policy_1_2": -144.31900024414062, "logps_train/policy_1_l": -141.3585662841797, "logps_train/policy_1_w": -103.38064575195312, "logps_train/policy_2_2": -88.43236541748047, "logps_train/policy_2_w": -157.75350952148438, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -115.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -0.8193991184234619, "rewards_train/1-l": -2.3178153038024902, "rewards_train/1-w": 2.870919704437256, "rewards_train/2-2": 2.6841073036193848, "rewards_train/2-w": -0.8316004276275635, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.188735008239746, "rewards_train/margins_1": 3.6903188228607178, "rewards_train/margins_2": 3.5157077312469482, "step": 533 }, { "epoch": 1.6, "logps_train/policy_1_2": -125.15641784667969, "logps_train/policy_1_l": -200.38021850585938, "logps_train/policy_1_w": -68.54032135009766, "logps_train/policy_2_2": -79.16084289550781, "logps_train/policy_2_w": -126.27063751220703, "logps_train/ref_1_2": -115.5, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -92.0, "logps_train/ref_2_2": -100.5, "logps_train/ref_2_w": -113.0, "rewards_train/1-2": -0.9867352843284607, "rewards_train/1-l": -3.104036331176758, "rewards_train/1-w": 2.372725486755371, "rewards_train/2-2": 2.1190719604492188, "rewards_train/2-w": -1.3374154567718506, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.476761817932129, "rewards_train/margins_1": 3.359460771083832, "rewards_train/margins_2": 3.4564874172210693, "step": 533 }, { "epoch": 1.6, "logps_train/policy_1_2": -185.47035217285156, "logps_train/policy_1_l": -148.96014404296875, "logps_train/policy_1_w": -90.92811584472656, "logps_train/policy_2_2": -112.11750793457031, "logps_train/policy_2_w": -166.9503173828125, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -2.3250632286071777, "rewards_train/1-l": -1.614471197128296, "rewards_train/1-w": 2.9264512062072754, "rewards_train/2-2": 2.5177412033081055, "rewards_train/2-w": -2.1342408657073975, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.540922403335571, "rewards_train/margins_1": 5.251514434814453, "rewards_train/margins_2": 4.651982069015503, "step": 533 }, { "epoch": 1.6, "logps_train/policy_1_2": -141.81228637695312, "logps_train/policy_1_l": -136.52581787109375, "logps_train/policy_1_w": -89.38066864013672, "logps_train/policy_2_2": -80.97523498535156, "logps_train/policy_2_w": -149.22605895996094, "logps_train/ref_1_2": -121.5, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -101.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -2.035917043685913, "rewards_train/1-l": -1.927533507347107, "rewards_train/1-w": 2.7152538299560547, "rewards_train/2-2": 2.0616557598114014, "rewards_train/2-w": -1.5198712348937988, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.642787337303162, "rewards_train/margins_1": 4.751170873641968, "rewards_train/margins_2": 3.5815269947052, "step": 533 }, { "epoch": 1.6, "learning_rate": 5.432233860413172e-07, "loss": 0.5437, "step": 534 }, { "epoch": 1.6, "logps_train/policy_1_2": -206.93026733398438, "logps_train/policy_1_l": -140.6322021484375, "logps_train/policy_1_w": -137.6558380126953, "logps_train/policy_2_2": -133.85211181640625, "logps_train/policy_2_w": -209.36209106445312, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -1.9727139472961426, "rewards_train/1-l": -1.618492603302002, "rewards_train/1-w": 3.1393473148345947, "rewards_train/2-2": 2.8765082359313965, "rewards_train/2-w": -1.704664945602417, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.757839918136597, "rewards_train/margins_1": 5.112061262130737, "rewards_train/margins_2": 4.5811731815338135, "step": 534 }, { "epoch": 1.6, "logps_train/policy_1_2": -213.2459716796875, "logps_train/policy_1_l": -228.92578125, "logps_train/policy_1_w": -120.0308837890625, "logps_train/policy_2_2": -136.64573669433594, "logps_train/policy_2_w": -206.41433715820312, "logps_train/ref_1_2": -195.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.8394417762756348, "rewards_train/1-l": -2.488670825958252, "rewards_train/1-w": 3.908435583114624, "rewards_train/2-2": 3.2291769981384277, "rewards_train/2-w": -1.6563745737075806, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.397106409072876, "rewards_train/margins_1": 5.747877359390259, "rewards_train/margins_2": 4.885551571846008, "step": 534 }, { "epoch": 1.6, "logps_train/policy_1_2": -221.90423583984375, "logps_train/policy_1_l": -229.26651000976562, "logps_train/policy_1_w": -128.18106079101562, "logps_train/policy_2_2": -132.3907470703125, "logps_train/policy_2_w": -214.9727783203125, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": -2.064642906188965, "rewards_train/1-l": -3.289271831512451, "rewards_train/1-w": 3.471738338470459, "rewards_train/2-2": 3.579773426055908, "rewards_train/2-w": -1.6004018783569336, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.76101016998291, "rewards_train/margins_1": 5.536381244659424, "rewards_train/margins_2": 5.180175304412842, "step": 534 }, { "epoch": 1.6, "logps_train/policy_1_2": -152.51548767089844, "logps_train/policy_1_l": -154.7319793701172, "logps_train/policy_1_w": -72.762451171875, "logps_train/policy_2_2": -96.85151672363281, "logps_train/policy_2_w": -118.28313446044922, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -110.0, "rewards_train/1-2": -1.6335806846618652, "rewards_train/1-l": -2.3304495811462402, "rewards_train/1-w": 2.3110108375549316, "rewards_train/2-2": 2.3707072734832764, "rewards_train/2-w": -0.8150323033332825, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.641460418701172, "rewards_train/margins_1": 3.944591522216797, "rewards_train/margins_2": 3.185739576816559, "step": 534 }, { "epoch": 1.6, "logps_train/policy_1_2": -173.59707641601562, "logps_train/policy_1_l": -224.66973876953125, "logps_train/policy_1_w": -143.84300231933594, "logps_train/policy_2_2": -106.92725372314453, "logps_train/policy_2_w": -219.14083862304688, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": -1.6610743999481201, "rewards_train/1-l": -3.125762462615967, "rewards_train/1-w": 3.606326103210449, "rewards_train/2-2": 2.3955564498901367, "rewards_train/2-w": -0.8660373091697693, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 6.732088565826416, "rewards_train/margins_1": 5.267400503158569, "rewards_train/margins_2": 3.261593759059906, "step": 535 }, { "epoch": 1.6, "logps_train/policy_1_2": -190.69854736328125, "logps_train/policy_1_l": -194.70388793945312, "logps_train/policy_1_w": -125.48928833007812, "logps_train/policy_2_2": -127.60554504394531, "logps_train/policy_2_w": -202.20645141601562, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.0905593633651733, "rewards_train/1-l": -1.9199498891830444, "rewards_train/1-w": 3.570016860961914, "rewards_train/2-2": 2.9244067668914795, "rewards_train/2-w": -1.5741603374481201, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.4899667501449585, "rewards_train/margins_1": 4.660576224327087, "rewards_train/margins_2": 4.4985671043396, "step": 535 }, { "epoch": 1.6, "logps_train/policy_1_2": -160.68739318847656, "logps_train/policy_1_l": -127.2118148803711, "logps_train/policy_1_w": -89.64623260498047, "logps_train/policy_2_2": -99.51142120361328, "logps_train/policy_2_w": -155.28579711914062, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.4628803730010986, "rewards_train/1-l": -2.040712356567383, "rewards_train/1-w": 2.746265172958374, "rewards_train/2-2": 2.7996392250061035, "rewards_train/2-w": -1.5553385019302368, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.786977529525757, "rewards_train/margins_1": 4.209145545959473, "rewards_train/margins_2": 4.35497772693634, "step": 535 }, { "epoch": 1.6, "logps_train/policy_1_2": -174.98513793945312, "logps_train/policy_1_l": -143.775634765625, "logps_train/policy_1_w": -158.80191040039062, "logps_train/policy_2_2": -116.26130676269531, "logps_train/policy_2_w": -238.16226196289062, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -1.1253693103790283, "rewards_train/1-l": -1.7825437784194946, "rewards_train/1-w": 3.403989315032959, "rewards_train/2-2": 2.7888102531433105, "rewards_train/2-w": -1.887514352798462, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.186533093452454, "rewards_train/margins_1": 4.529358625411987, "rewards_train/margins_2": 4.6763246059417725, "step": 535 }, { "epoch": 1.6, "learning_rate": 5.279474068256768e-07, "loss": 0.5759, "step": 536 }, { "epoch": 1.6, "logps_train/policy_1_2": -159.08865356445312, "logps_train/policy_1_l": -130.01351928710938, "logps_train/policy_1_w": -74.96649169921875, "logps_train/policy_2_2": -109.14397430419922, "logps_train/policy_2_w": -117.60651397705078, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -100.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": -0.6243914365768433, "rewards_train/1-l": -2.1372900009155273, "rewards_train/1-w": 2.4955379962921143, "rewards_train/2-2": 2.810016632080078, "rewards_train/2-w": -0.2649475932121277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.632827997207642, "rewards_train/margins_1": 3.1199294328689575, "rewards_train/margins_2": 3.074964225292206, "step": 536 }, { "epoch": 1.6, "logps_train/policy_1_2": -137.26515197753906, "logps_train/policy_1_l": -142.12948608398438, "logps_train/policy_1_w": -103.32433319091797, "logps_train/policy_2_2": -87.11662292480469, "logps_train/policy_2_w": -152.90415954589844, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -110.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -0.7884294986724854, "rewards_train/1-l": -1.802987813949585, "rewards_train/1-w": 2.2264046669006348, "rewards_train/2-2": 2.287288188934326, "rewards_train/2-w": -1.1824085712432861, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.02939248085022, "rewards_train/margins_1": 3.01483416557312, "rewards_train/margins_2": 3.4696967601776123, "step": 536 }, { "epoch": 1.6, "logps_train/policy_1_2": -215.80230712890625, "logps_train/policy_1_l": -193.73948669433594, "logps_train/policy_1_w": -119.64114379882812, "logps_train/policy_2_2": -141.14425659179688, "logps_train/policy_2_w": -198.24652099609375, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.960700273513794, "rewards_train/1-l": -2.0620343685150146, "rewards_train/1-w": 3.0611791610717773, "rewards_train/2-2": 3.0332322120666504, "rewards_train/2-w": -1.8644976615905762, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.123213529586792, "rewards_train/margins_1": 5.021879434585571, "rewards_train/margins_2": 4.897729873657227, "step": 536 }, { "epoch": 1.6, "logps_train/policy_1_2": -154.58023071289062, "logps_train/policy_1_l": -175.08416748046875, "logps_train/policy_1_w": -161.28973388671875, "logps_train/policy_2_2": -101.65855407714844, "logps_train/policy_2_w": -247.30087280273438, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -223.0, "rewards_train/1-2": -0.5923978686332703, "rewards_train/1-l": -1.824836015701294, "rewards_train/1-w": 3.562823534011841, "rewards_train/2-2": 2.5923476219177246, "rewards_train/2-w": -2.3632891178131104, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.387659549713135, "rewards_train/margins_1": 4.155221402645111, "rewards_train/margins_2": 4.955636739730835, "step": 536 }, { "epoch": 1.61, "logps_train/policy_1_2": -210.26644897460938, "logps_train/policy_1_l": -205.7349853515625, "logps_train/policy_1_w": -155.312744140625, "logps_train/policy_2_2": -132.81756591796875, "logps_train/policy_2_w": -247.67198181152344, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -1.6696126461029053, "rewards_train/1-l": -1.7089976072311401, "rewards_train/1-w": 3.308177947998047, "rewards_train/2-2": 3.2553529739379883, "rewards_train/2-w": -2.6015734672546387, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.017175555229187, "rewards_train/margins_1": 4.977790594100952, "rewards_train/margins_2": 5.856926441192627, "step": 537 }, { "epoch": 1.61, "logps_train/policy_1_2": -158.51292419433594, "logps_train/policy_1_l": -184.39358520507812, "logps_train/policy_1_w": -111.79946899414062, "logps_train/policy_2_2": -93.88915252685547, "logps_train/policy_2_w": -186.88497924804688, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.4344959259033203, "rewards_train/1-l": -2.2371134757995605, "rewards_train/1-w": 2.863412380218506, "rewards_train/2-2": 2.533740997314453, "rewards_train/2-w": -1.910761833190918, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.100525856018066, "rewards_train/margins_1": 4.297908306121826, "rewards_train/margins_2": 4.444502830505371, "step": 537 }, { "epoch": 1.61, "logps_train/policy_1_2": -215.97915649414062, "logps_train/policy_1_l": -171.91830444335938, "logps_train/policy_1_w": -139.6298828125, "logps_train/policy_2_2": -154.0203094482422, "logps_train/policy_2_w": -199.63999938964844, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.8736965656280518, "rewards_train/1-l": -2.3170742988586426, "rewards_train/1-w": 3.093822479248047, "rewards_train/2-2": 3.2815637588500977, "rewards_train/2-w": -0.7290143966674805, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.4108967781066895, "rewards_train/margins_1": 3.9675190448760986, "rewards_train/margins_2": 4.010578155517578, "step": 537 }, { "epoch": 1.61, "logps_train/policy_1_2": -149.7987060546875, "logps_train/policy_1_l": -174.13064575195312, "logps_train/policy_1_w": -107.2236328125, "logps_train/policy_2_2": -98.4913558959961, "logps_train/policy_2_w": -176.35987854003906, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -0.9385612607002258, "rewards_train/1-l": -3.285818576812744, "rewards_train/1-w": 3.2245116233825684, "rewards_train/2-2": 2.447739362716675, "rewards_train/2-w": -1.5840349197387695, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.5103302001953125, "rewards_train/margins_1": 4.163072884082794, "rewards_train/margins_2": 4.031774282455444, "step": 537 }, { "epoch": 1.61, "learning_rate": 5.128638964666166e-07, "loss": 0.5779, "step": 538 }, { "epoch": 1.61, "logps_train/policy_1_2": -172.10459899902344, "logps_train/policy_1_l": -151.50210571289062, "logps_train/policy_1_w": -102.7660140991211, "logps_train/policy_2_2": -110.59069061279297, "logps_train/policy_2_w": -167.0972900390625, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": -1.643272876739502, "rewards_train/1-l": -2.943960189819336, "rewards_train/1-w": 2.8173437118530273, "rewards_train/2-2": 2.7844858169555664, "rewards_train/2-w": -1.1728135347366333, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.761303901672363, "rewards_train/margins_1": 4.460616588592529, "rewards_train/margins_2": 3.9572993516921997, "step": 538 }, { "epoch": 1.61, "logps_train/policy_1_2": -162.82139587402344, "logps_train/policy_1_l": -111.33270263671875, "logps_train/policy_1_w": -104.67787170410156, "logps_train/policy_2_2": -113.33219909667969, "logps_train/policy_2_w": -161.15835571289062, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": -0.7258411645889282, "rewards_train/1-l": -1.945379614830017, "rewards_train/1-w": 3.0910017490386963, "rewards_train/2-2": 2.3615550994873047, "rewards_train/2-w": -0.7353674173355103, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.036381363868713, "rewards_train/margins_1": 3.8168429136276245, "rewards_train/margins_2": 3.096922516822815, "step": 538 }, { "epoch": 1.61, "logps_train/policy_1_2": -170.99359130859375, "logps_train/policy_1_l": -145.36468505859375, "logps_train/policy_1_w": -128.5065460205078, "logps_train/policy_2_2": -100.19265747070312, "logps_train/policy_2_w": -197.167724609375, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -1.813812494277954, "rewards_train/1-l": -1.4668629169464111, "rewards_train/1-w": 3.0048389434814453, "rewards_train/2-2": 2.5697975158691406, "rewards_train/2-w": -1.1767340898513794, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.4717018604278564, "rewards_train/margins_1": 4.818651437759399, "rewards_train/margins_2": 3.74653160572052, "step": 538 }, { "epoch": 1.61, "logps_train/policy_1_2": -226.511962890625, "logps_train/policy_1_l": -206.44119262695312, "logps_train/policy_1_w": -120.93069458007812, "logps_train/policy_2_2": -143.83709716796875, "logps_train/policy_2_w": -215.72525024414062, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -2.592115640640259, "rewards_train/1-l": -3.3866965770721436, "rewards_train/1-w": 3.5351529121398926, "rewards_train/2-2": 2.910674571990967, "rewards_train/2-w": -2.8254051208496094, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.921849489212036, "rewards_train/margins_1": 6.127268552780151, "rewards_train/margins_2": 5.736079692840576, "step": 538 }, { "epoch": 1.61, "logps_train/policy_1_2": -194.90594482421875, "logps_train/policy_1_l": -176.7215576171875, "logps_train/policy_1_w": -87.217529296875, "logps_train/policy_2_2": -122.3371353149414, "logps_train/policy_2_w": -150.64639282226562, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.9991874694824219, "rewards_train/1-l": -2.170823097229004, "rewards_train/1-w": 2.403735637664795, "rewards_train/2-2": 2.6157007217407227, "rewards_train/2-w": -1.0612205266952515, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.574558734893799, "rewards_train/margins_1": 4.402923107147217, "rewards_train/margins_2": 3.676921248435974, "step": 539 }, { "epoch": 1.61, "logps_train/policy_1_2": -191.73953247070312, "logps_train/policy_1_l": -144.16937255859375, "logps_train/policy_1_w": -104.89429473876953, "logps_train/policy_2_2": -122.01968383789062, "logps_train/policy_2_w": -165.73452758789062, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -1.7317657470703125, "rewards_train/1-l": -2.011420488357544, "rewards_train/1-w": 2.6530022621154785, "rewards_train/2-2": 3.18709397315979, "rewards_train/2-w": -1.2765772342681885, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.6644227504730225, "rewards_train/margins_1": 4.384768009185791, "rewards_train/margins_2": 4.4636712074279785, "step": 539 }, { "epoch": 1.61, "logps_train/policy_1_2": -210.26766967773438, "logps_train/policy_1_l": -159.521484375, "logps_train/policy_1_w": -140.3015594482422, "logps_train/policy_2_2": -142.87832641601562, "logps_train/policy_2_w": -210.02682495117188, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -1.1447362899780273, "rewards_train/1-l": -2.269653797149658, "rewards_train/1-w": 3.5401570796966553, "rewards_train/2-2": 3.3961527347564697, "rewards_train/2-w": -1.430025339126587, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.8098108768463135, "rewards_train/margins_1": 4.684893369674683, "rewards_train/margins_2": 4.826178073883057, "step": 539 }, { "epoch": 1.61, "logps_train/policy_1_2": -161.6075897216797, "logps_train/policy_1_l": -176.51412963867188, "logps_train/policy_1_w": -95.11167907714844, "logps_train/policy_2_2": -107.87724304199219, "logps_train/policy_2_w": -149.98756408691406, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -0.5341960787773132, "rewards_train/1-l": -2.637740135192871, "rewards_train/1-w": 2.970863103866577, "rewards_train/2-2": 3.1130571365356445, "rewards_train/2-w": -0.5698503255844116, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.608603239059448, "rewards_train/margins_1": 3.5050591826438904, "rewards_train/margins_2": 3.682907462120056, "step": 539 }, { "epoch": 1.62, "learning_rate": 4.97974327088156e-07, "loss": 0.5363, "step": 540 }, { "epoch": 1.62, "logps_train/policy_1_2": -154.92691040039062, "logps_train/policy_1_l": -115.25117492675781, "logps_train/policy_1_w": -82.6196517944336, "logps_train/policy_2_2": -96.32485961914062, "logps_train/policy_2_w": -140.6800537109375, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -93.5, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -129.0, "rewards_train/1-2": -1.306702971458435, "rewards_train/1-l": -2.161151885986328, "rewards_train/1-w": 2.543504238128662, "rewards_train/2-2": 2.4142422676086426, "rewards_train/2-w": -1.1844122409820557, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.70465612411499, "rewards_train/margins_1": 3.850207209587097, "rewards_train/margins_2": 3.5986545085906982, "step": 540 }, { "epoch": 1.62, "logps_train/policy_1_2": -144.86587524414062, "logps_train/policy_1_l": -167.96072387695312, "logps_train/policy_1_w": -112.51856231689453, "logps_train/policy_2_2": -87.96046447753906, "logps_train/policy_2_w": -167.71913146972656, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.1940091848373413, "rewards_train/1-l": -2.4686317443847656, "rewards_train/1-w": 2.909179210662842, "rewards_train/2-2": 2.805515766143799, "rewards_train/2-w": -0.9272836446762085, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.377810955047607, "rewards_train/margins_1": 4.103188395500183, "rewards_train/margins_2": 3.7327994108200073, "step": 540 }, { "epoch": 1.62, "logps_train/policy_1_2": -153.488037109375, "logps_train/policy_1_l": -166.83993530273438, "logps_train/policy_1_w": -144.32791137695312, "logps_train/policy_2_2": -90.08392333984375, "logps_train/policy_2_w": -246.39817810058594, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -1.7835693359375, "rewards_train/1-l": -2.136583089828491, "rewards_train/1-w": 3.190011739730835, "rewards_train/2-2": 2.5539119243621826, "rewards_train/2-w": -2.8596417903900146, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.326594829559326, "rewards_train/margins_1": 4.973581075668335, "rewards_train/margins_2": 5.413553714752197, "step": 540 }, { "epoch": 1.62, "logps_train/policy_1_2": -165.25814819335938, "logps_train/policy_1_l": -133.71856689453125, "logps_train/policy_1_w": -94.03665161132812, "logps_train/policy_2_2": -110.27105712890625, "logps_train/policy_2_w": -152.47793579101562, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -124.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -0.81331467628479, "rewards_train/1-l": -1.2528008222579956, "rewards_train/1-w": 3.0430753231048584, "rewards_train/2-2": 2.9318783283233643, "rewards_train/2-w": -0.3184968829154968, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.295876145362854, "rewards_train/margins_1": 3.8563899993896484, "rewards_train/margins_2": 3.250375211238861, "step": 540 }, { "epoch": 1.62, "logps_train/policy_1_2": -161.84759521484375, "logps_train/policy_1_l": -151.80776977539062, "logps_train/policy_1_w": -113.54058837890625, "logps_train/policy_2_2": -89.5355453491211, "logps_train/policy_2_w": -195.31358337402344, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -2.2933542728424072, "rewards_train/1-l": -1.6699128150939941, "rewards_train/1-w": 3.3959407806396484, "rewards_train/2-2": 2.6034765243530273, "rewards_train/2-w": -2.3352644443511963, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.065853595733643, "rewards_train/margins_1": 5.689295053482056, "rewards_train/margins_2": 4.938740968704224, "step": 541 }, { "epoch": 1.62, "logps_train/policy_1_2": -139.9600372314453, "logps_train/policy_1_l": -123.9527816772461, "logps_train/policy_1_w": -111.16502380371094, "logps_train/policy_2_2": -76.3216781616211, "logps_train/policy_2_w": -190.50892639160156, "logps_train/ref_1_2": -120.5, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -99.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": -1.9503003358840942, "rewards_train/1-l": -2.046205997467041, "rewards_train/1-w": 3.2604503631591797, "rewards_train/2-2": 2.269101619720459, "rewards_train/2-w": -2.350501775741577, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.306656360626221, "rewards_train/margins_1": 5.210750699043274, "rewards_train/margins_2": 4.619603395462036, "step": 541 }, { "epoch": 1.62, "logps_train/policy_1_2": -194.68280029296875, "logps_train/policy_1_l": -148.14788818359375, "logps_train/policy_1_w": -119.12022399902344, "logps_train/policy_2_2": -124.11335754394531, "logps_train/policy_2_w": -182.7064208984375, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -124.5, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": -1.8796077966690063, "rewards_train/1-l": -2.360393762588501, "rewards_train/1-w": 2.883680820465088, "rewards_train/2-2": 2.9699149131774902, "rewards_train/2-w": -1.540174126625061, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.244074583053589, "rewards_train/margins_1": 4.763288617134094, "rewards_train/margins_2": 4.510089039802551, "step": 541 }, { "epoch": 1.62, "logps_train/policy_1_2": -198.81692504882812, "logps_train/policy_1_l": -169.28292846679688, "logps_train/policy_1_w": -142.66493225097656, "logps_train/policy_2_2": -125.91378784179688, "logps_train/policy_2_w": -242.68707275390625, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -2.403177261352539, "rewards_train/1-l": -2.21144700050354, "rewards_train/1-w": 3.577256917953491, "rewards_train/2-2": 2.74397349357605, "rewards_train/2-w": -3.0577707290649414, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.788703918457031, "rewards_train/margins_1": 5.98043417930603, "rewards_train/margins_2": 5.801744222640991, "step": 541 }, { "epoch": 1.62, "learning_rate": 4.832801518860175e-07, "loss": 0.5742, "step": 542 }, { "epoch": 1.62, "logps_train/policy_1_2": -181.13693237304688, "logps_train/policy_1_l": -182.77667236328125, "logps_train/policy_1_w": -110.75189208984375, "logps_train/policy_2_2": -110.90524291992188, "logps_train/policy_2_w": -183.29824829101562, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.6060758829116821, "rewards_train/1-l": -2.015313148498535, "rewards_train/1-w": 2.8502020835876465, "rewards_train/2-2": 3.0714874267578125, "rewards_train/2-w": -1.7934978008270264, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.865515232086182, "rewards_train/margins_1": 4.456277966499329, "rewards_train/margins_2": 4.864985227584839, "step": 542 }, { "epoch": 1.62, "logps_train/policy_1_2": -195.46180725097656, "logps_train/policy_1_l": -238.7638397216797, "logps_train/policy_1_w": -160.1896514892578, "logps_train/policy_2_2": -140.77487182617188, "logps_train/policy_2_w": -226.1716766357422, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -0.6526262760162354, "rewards_train/1-l": -2.7513833045959473, "rewards_train/1-w": 3.580643653869629, "rewards_train/2-2": 2.933744192123413, "rewards_train/2-w": -0.7443169355392456, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.332026958465576, "rewards_train/margins_1": 4.233269929885864, "rewards_train/margins_2": 3.6780611276626587, "step": 542 }, { "epoch": 1.62, "logps_train/policy_1_2": -157.21128845214844, "logps_train/policy_1_l": -133.3487548828125, "logps_train/policy_1_w": -61.05830001831055, "logps_train/policy_2_2": -93.13148498535156, "logps_train/policy_2_w": -104.77051544189453, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -81.5, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -95.0, "rewards_train/1-2": -1.5820659399032593, "rewards_train/1-l": -2.3594672679901123, "rewards_train/1-w": 2.066063404083252, "rewards_train/2-2": 2.8351430892944336, "rewards_train/2-w": -0.9592775106430054, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.425530672073364, "rewards_train/margins_1": 3.6481293439865112, "rewards_train/margins_2": 3.794420599937439, "step": 542 }, { "epoch": 1.62, "logps_train/policy_1_2": -206.44691467285156, "logps_train/policy_1_l": -175.83660888671875, "logps_train/policy_1_w": -119.40606689453125, "logps_train/policy_2_2": -124.732421875, "logps_train/policy_2_w": -194.67498779296875, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -2.7165656089782715, "rewards_train/1-l": -2.718035936355591, "rewards_train/1-w": 2.7349796295166016, "rewards_train/2-2": 3.0251946449279785, "rewards_train/2-w": -2.361638069152832, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.453015565872192, "rewards_train/margins_1": 5.451545238494873, "rewards_train/margins_2": 5.3868327140808105, "step": 542 }, { "epoch": 1.63, "logps_train/policy_1_2": -168.57635498046875, "logps_train/policy_1_l": -198.01809692382812, "logps_train/policy_1_w": -107.57109069824219, "logps_train/policy_2_2": -103.93717956542969, "logps_train/policy_2_w": -180.5462646484375, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.3107614517211914, "rewards_train/1-l": -2.2261509895324707, "rewards_train/1-w": 2.928828239440918, "rewards_train/2-2": 2.4179999828338623, "rewards_train/2-w": -1.4950554370880127, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.154979228973389, "rewards_train/margins_1": 4.239589691162109, "rewards_train/margins_2": 3.913055419921875, "step": 543 }, { "epoch": 1.63, "logps_train/policy_1_2": -205.18310546875, "logps_train/policy_1_l": -185.11468505859375, "logps_train/policy_1_w": -163.58328247070312, "logps_train/policy_2_2": -132.1131591796875, "logps_train/policy_2_w": -249.5260467529297, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -199.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -228.0, "rewards_train/1-2": -1.3660881519317627, "rewards_train/1-l": -2.1554131507873535, "rewards_train/1-w": 3.577218770980835, "rewards_train/2-2": 3.569200277328491, "rewards_train/2-w": -2.1170573234558105, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.7326319217681885, "rewards_train/margins_1": 4.943306922912598, "rewards_train/margins_2": 5.686257600784302, "step": 543 }, { "epoch": 1.63, "logps_train/policy_1_2": -167.11770629882812, "logps_train/policy_1_l": -161.90280151367188, "logps_train/policy_1_w": -132.681884765625, "logps_train/policy_2_2": -107.71524047851562, "logps_train/policy_2_w": -213.59774780273438, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.2348181009292603, "rewards_train/1-l": -1.9447723627090454, "rewards_train/1-w": 2.9935302734375, "rewards_train/2-2": 2.545468807220459, "rewards_train/2-w": -2.259775161743164, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.938302636146545, "rewards_train/margins_1": 4.22834837436676, "rewards_train/margins_2": 4.805243968963623, "step": 543 }, { "epoch": 1.63, "logps_train/policy_1_2": -195.6009521484375, "logps_train/policy_1_l": -162.65335083007812, "logps_train/policy_1_w": -110.05668640136719, "logps_train/policy_2_2": -130.78207397460938, "logps_train/policy_2_w": -161.99549865722656, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -0.7518907189369202, "rewards_train/1-l": -2.691507339477539, "rewards_train/1-w": 3.1138625144958496, "rewards_train/2-2": 3.6128082275390625, "rewards_train/2-w": -0.38626882433891296, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.805369853973389, "rewards_train/margins_1": 3.8657532334327698, "rewards_train/margins_2": 3.9990770518779755, "step": 543 }, { "epoch": 1.63, "learning_rate": 4.6878280498579674e-07, "loss": 0.5517, "step": 544 }, { "epoch": 1.63, "logps_train/policy_1_2": -184.72906494140625, "logps_train/policy_1_l": -199.74749755859375, "logps_train/policy_1_w": -142.28704833984375, "logps_train/policy_2_2": -119.64274597167969, "logps_train/policy_2_w": -221.49595642089844, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -1.4762260913848877, "rewards_train/1-l": -1.9195733070373535, "rewards_train/1-w": 3.4634833335876465, "rewards_train/2-2": 2.6047685146331787, "rewards_train/2-w": -1.7538933753967285, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.383056640625, "rewards_train/margins_1": 4.939709424972534, "rewards_train/margins_2": 4.358661890029907, "step": 544 }, { "epoch": 1.63, "logps_train/policy_1_2": -194.04165649414062, "logps_train/policy_1_l": -199.5484161376953, "logps_train/policy_1_w": -145.78089904785156, "logps_train/policy_2_2": -125.2609634399414, "logps_train/policy_2_w": -227.28500366210938, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -1.2424464225769043, "rewards_train/1-l": -2.7282791137695312, "rewards_train/1-w": 3.77972149848938, "rewards_train/2-2": 3.1840600967407227, "rewards_train/2-w": -1.9042831659317017, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.508000612258911, "rewards_train/margins_1": 5.022167921066284, "rewards_train/margins_2": 5.088343262672424, "step": 544 }, { "epoch": 1.63, "logps_train/policy_1_2": -194.26805114746094, "logps_train/policy_1_l": -184.54934692382812, "logps_train/policy_1_w": -132.21624755859375, "logps_train/policy_2_2": -116.40969848632812, "logps_train/policy_2_w": -227.58383178710938, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -2.1369619369506836, "rewards_train/1-l": -2.4998555183410645, "rewards_train/1-w": 3.389312982559204, "rewards_train/2-2": 3.178170919418335, "rewards_train/2-w": -2.3904147148132324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.8891685009002686, "rewards_train/margins_1": 5.526274919509888, "rewards_train/margins_2": 5.568585634231567, "step": 544 }, { "epoch": 1.63, "logps_train/policy_1_2": -158.23922729492188, "logps_train/policy_1_l": -179.6556854248047, "logps_train/policy_1_w": -117.10045623779297, "logps_train/policy_2_2": -100.45496368408203, "logps_train/policy_2_w": -173.6858367919922, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -123.5, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -1.1614232063293457, "rewards_train/1-l": -2.454436779022217, "rewards_train/1-w": 2.5493295192718506, "rewards_train/2-2": 2.302940845489502, "rewards_train/2-w": -1.019364833831787, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.003766298294067, "rewards_train/margins_1": 3.7107527256011963, "rewards_train/margins_2": 3.322305679321289, "step": 544 }, { "epoch": 1.63, "logps_train/policy_1_2": -164.218505859375, "logps_train/policy_1_l": -144.50531005859375, "logps_train/policy_1_w": -97.95437622070312, "logps_train/policy_2_2": -104.77375030517578, "logps_train/policy_2_w": -164.05380249023438, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -122.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -1.4907965660095215, "rewards_train/1-l": -2.2128353118896484, "rewards_train/1-w": 3.063547134399414, "rewards_train/2-2": 2.569499969482422, "rewards_train/2-w": -1.4143650531768799, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.2763824462890625, "rewards_train/margins_1": 4.5543437004089355, "rewards_train/margins_2": 3.9838650226593018, "step": 545 }, { "epoch": 1.63, "logps_train/policy_1_2": -150.90394592285156, "logps_train/policy_1_l": -138.48959350585938, "logps_train/policy_1_w": -118.9586410522461, "logps_train/policy_2_2": -101.98797607421875, "logps_train/policy_2_w": -184.68536376953125, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.182192087173462, "rewards_train/1-l": -1.2980804443359375, "rewards_train/1-w": 2.821713924407959, "rewards_train/2-2": 1.9879215955734253, "rewards_train/2-w": -1.4853328466415405, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.1197943687438965, "rewards_train/margins_1": 4.003906011581421, "rewards_train/margins_2": 3.473254442214966, "step": 545 }, { "epoch": 1.63, "logps_train/policy_1_2": -154.3455352783203, "logps_train/policy_1_l": -120.96669006347656, "logps_train/policy_1_w": -104.32630920410156, "logps_train/policy_2_2": -99.23800659179688, "logps_train/policy_2_w": -156.32183837890625, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -127.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -1.136115312576294, "rewards_train/1-l": -1.988857388496399, "rewards_train/1-w": 2.263169288635254, "rewards_train/2-2": 2.5954370498657227, "rewards_train/2-w": -1.2804245948791504, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.252026677131653, "rewards_train/margins_1": 3.399284601211548, "rewards_train/margins_2": 3.875861644744873, "step": 545 }, { "epoch": 1.63, "logps_train/policy_1_2": -212.827392578125, "logps_train/policy_1_l": -180.43777465820312, "logps_train/policy_1_w": -127.20418548583984, "logps_train/policy_2_2": -134.91941833496094, "logps_train/policy_2_w": -205.8203887939453, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.6522693634033203, "rewards_train/1-l": -1.7412877082824707, "rewards_train/1-w": 3.499894618988037, "rewards_train/2-2": 2.9865734577178955, "rewards_train/2-w": -1.8990306854248047, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.241182327270508, "rewards_train/margins_1": 5.152163982391357, "rewards_train/margins_2": 4.8856041431427, "step": 545 }, { "epoch": 1.63, "learning_rate": 4.54483701302994e-07, "loss": 0.5124, "step": 546 }, { "epoch": 1.63, "logps_train/policy_1_2": -133.85943603515625, "logps_train/policy_1_l": -146.31353759765625, "logps_train/policy_1_w": -94.56475067138672, "logps_train/policy_2_2": -79.68154907226562, "logps_train/policy_2_w": -162.14112854003906, "logps_train/ref_1_2": -121.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -102.5, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -1.275496006011963, "rewards_train/1-l": -1.733136773109436, "rewards_train/1-w": 2.5118842124938965, "rewards_train/2-2": 2.2895357608795166, "rewards_train/2-w": -1.912208080291748, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.2450209856033325, "rewards_train/margins_1": 3.7873802185058594, "rewards_train/margins_2": 4.201743841171265, "step": 546 }, { "epoch": 1.63, "logps_train/policy_1_2": -207.99176025390625, "logps_train/policy_1_l": -150.88796997070312, "logps_train/policy_1_w": -110.26361083984375, "logps_train/policy_2_2": -131.03982543945312, "logps_train/policy_2_w": -172.32669067382812, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -2.0878472328186035, "rewards_train/1-l": -3.014577865600586, "rewards_train/1-w": 2.704498052597046, "rewards_train/2-2": 3.364034414291382, "rewards_train/2-w": -1.4272006750106812, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.719075918197632, "rewards_train/margins_1": 4.792345285415649, "rewards_train/margins_2": 4.791235089302063, "step": 546 }, { "epoch": 1.63, "logps_train/policy_1_2": -177.82191467285156, "logps_train/policy_1_l": -137.0979766845703, "logps_train/policy_1_w": -103.81706237792969, "logps_train/policy_2_2": -102.0981216430664, "logps_train/policy_2_w": -188.4320831298828, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -2.745666980743408, "rewards_train/1-l": -2.5926098823547363, "rewards_train/1-w": 2.8505687713623047, "rewards_train/2-2": 2.4813990592956543, "rewards_train/2-w": -2.818598985671997, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.443178653717041, "rewards_train/margins_1": 5.596235752105713, "rewards_train/margins_2": 5.299998044967651, "step": 546 }, { "epoch": 1.63, "logps_train/policy_1_2": -179.6727752685547, "logps_train/policy_1_l": -121.2767333984375, "logps_train/policy_1_w": -93.20535278320312, "logps_train/policy_2_2": -109.45206451416016, "logps_train/policy_2_w": -154.58163452148438, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -100.5, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -2.2290937900543213, "rewards_train/1-l": -2.093029499053955, "rewards_train/1-w": 2.3224081993103027, "rewards_train/2-2": 2.6876060962677, "rewards_train/2-w": -1.7943933010101318, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.415437698364258, "rewards_train/margins_1": 4.551501989364624, "rewards_train/margins_2": 4.481999397277832, "step": 546 }, { "epoch": 1.64, "logps_train/policy_1_2": -234.3634033203125, "logps_train/policy_1_l": -177.27734375, "logps_train/policy_1_w": -105.68374633789062, "logps_train/policy_2_2": -149.11196899414062, "logps_train/policy_2_w": -179.1730194091797, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -2.3144655227661133, "rewards_train/1-l": -2.7025394439697266, "rewards_train/1-w": 2.9534027576446533, "rewards_train/2-2": 3.445833921432495, "rewards_train/2-w": -1.726578950881958, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.65594220161438, "rewards_train/margins_1": 5.267868280410767, "rewards_train/margins_2": 5.172412872314453, "step": 547 }, { "epoch": 1.64, "logps_train/policy_1_2": -178.6282196044922, "logps_train/policy_1_l": -197.07456970214844, "logps_train/policy_1_w": -154.970703125, "logps_train/policy_2_2": -114.39570617675781, "logps_train/policy_2_w": -252.07139587402344, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -227.0, "rewards_train/1-2": -1.2065722942352295, "rewards_train/1-l": -3.0211291313171387, "rewards_train/1-w": 3.612987756729126, "rewards_train/2-2": 2.9166789054870605, "rewards_train/2-w": -2.486436605453491, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.634116888046265, "rewards_train/margins_1": 4.8195600509643555, "rewards_train/margins_2": 5.403115510940552, "step": 547 }, { "epoch": 1.64, "logps_train/policy_1_2": -201.1735076904297, "logps_train/policy_1_l": -168.4466552734375, "logps_train/policy_1_w": -134.237060546875, "logps_train/policy_2_2": -138.98977661132812, "logps_train/policy_2_w": -198.83834838867188, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -0.9653981328010559, "rewards_train/1-l": -1.8790408372879028, "rewards_train/1-w": 3.2614495754241943, "rewards_train/2-2": 3.352976083755493, "rewards_train/2-w": -0.8162569999694824, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.140490412712097, "rewards_train/margins_1": 4.22684770822525, "rewards_train/margins_2": 4.169233083724976, "step": 547 }, { "epoch": 1.64, "logps_train/policy_1_2": -192.51744079589844, "logps_train/policy_1_l": -141.2209930419922, "logps_train/policy_1_w": -91.5079345703125, "logps_train/policy_2_2": -125.05164337158203, "logps_train/policy_2_w": -154.11376953125, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": -2.265807628631592, "rewards_train/1-l": -1.6928510665893555, "rewards_train/1-w": 2.39398193359375, "rewards_train/2-2": 2.492100954055786, "rewards_train/2-w": -1.7997567653656006, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.0868330001831055, "rewards_train/margins_1": 4.659789562225342, "rewards_train/margins_2": 4.291857719421387, "step": 547 }, { "epoch": 1.64, "learning_rate": 4.40384236404921e-07, "loss": 0.5562, "step": 548 }, { "epoch": 1.64, "logps_train/policy_1_2": -201.2850799560547, "logps_train/policy_1_l": -209.30960083007812, "logps_train/policy_1_w": -115.7164077758789, "logps_train/policy_2_2": -118.72261047363281, "logps_train/policy_2_w": -194.27096557617188, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -2.609757423400879, "rewards_train/1-l": -3.4632835388183594, "rewards_train/1-w": 2.9648828506469727, "rewards_train/2-2": 2.8328170776367188, "rewards_train/2-w": -2.2330517768859863, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.428166389465332, "rewards_train/margins_1": 5.574640274047852, "rewards_train/margins_2": 5.065868854522705, "step": 548 }, { "epoch": 1.64, "logps_train/policy_1_2": -207.00717163085938, "logps_train/policy_1_l": -158.16278076171875, "logps_train/policy_1_w": -118.76780700683594, "logps_train/policy_2_2": -131.0155029296875, "logps_train/policy_2_w": -185.01319885253906, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -2.0929040908813477, "rewards_train/1-l": -1.9359081983566284, "rewards_train/1-w": 2.487868547439575, "rewards_train/2-2": 3.117198944091797, "rewards_train/2-w": -1.6123552322387695, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.423776745796204, "rewards_train/margins_1": 4.580772638320923, "rewards_train/margins_2": 4.729554176330566, "step": 548 }, { "epoch": 1.64, "logps_train/policy_1_2": -229.8076171875, "logps_train/policy_1_l": -214.76260375976562, "logps_train/policy_1_w": -154.73980712890625, "logps_train/policy_2_2": -138.73696899414062, "logps_train/policy_2_w": -239.67343139648438, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -188.0, "logps_train/ref_1_w": -185.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -2.7456066608428955, "rewards_train/1-l": -2.705948829650879, "rewards_train/1-w": 3.028752565383911, "rewards_train/2-2": 3.5427093505859375, "rewards_train/2-w": -2.21148419380188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.73470139503479, "rewards_train/margins_1": 5.774359226226807, "rewards_train/margins_2": 5.754193544387817, "step": 548 }, { "epoch": 1.64, "logps_train/policy_1_2": -279.3184814453125, "logps_train/policy_1_l": -192.70034790039062, "logps_train/policy_1_w": -146.22091674804688, "logps_train/policy_2_2": -189.26754760742188, "logps_train/policy_2_w": -230.47872924804688, "logps_train/ref_1_2": -260.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -231.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -1.8615363836288452, "rewards_train/1-l": -1.5168375968933105, "rewards_train/1-w": 3.886918067932129, "rewards_train/2-2": 4.138676166534424, "rewards_train/2-w": -1.6064671277999878, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.4037556648254395, "rewards_train/margins_1": 5.748454451560974, "rewards_train/margins_2": 5.745143294334412, "step": 548 }, { "epoch": 1.64, "logps_train/policy_1_2": -228.3884735107422, "logps_train/policy_1_l": -165.10540771484375, "logps_train/policy_1_w": -99.73683166503906, "logps_train/policy_2_2": -160.17495727539062, "logps_train/policy_2_w": -165.77182006835938, "logps_train/ref_1_2": -216.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -1.2841606140136719, "rewards_train/1-l": -2.49452543258667, "rewards_train/1-w": 2.961082935333252, "rewards_train/2-2": 3.2496931552886963, "rewards_train/2-w": -1.2974951267242432, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.455608367919922, "rewards_train/margins_1": 4.245243549346924, "rewards_train/margins_2": 4.5471882820129395, "step": 549 }, { "epoch": 1.64, "logps_train/policy_1_2": -219.36134338378906, "logps_train/policy_1_l": -204.42755126953125, "logps_train/policy_1_w": -119.713134765625, "logps_train/policy_2_2": -148.4190216064453, "logps_train/policy_2_w": -197.44049072265625, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -2.083008050918579, "rewards_train/1-l": -3.0990042686462402, "rewards_train/1-w": 3.12790584564209, "rewards_train/2-2": 2.8151297569274902, "rewards_train/2-w": -1.6026421785354614, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.22691011428833, "rewards_train/margins_1": 5.210913896560669, "rewards_train/margins_2": 4.417771935462952, "step": 549 }, { "epoch": 1.64, "logps_train/policy_1_2": -248.83851623535156, "logps_train/policy_1_l": -200.6505126953125, "logps_train/policy_1_w": -142.881103515625, "logps_train/policy_2_2": -157.6309051513672, "logps_train/policy_2_w": -237.953857421875, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -2.066664695739746, "rewards_train/1-l": -2.16939640045166, "rewards_train/1-w": 3.534935474395752, "rewards_train/2-2": 3.9435501098632812, "rewards_train/2-w": -2.351634979248047, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.704331874847412, "rewards_train/margins_1": 5.601600170135498, "rewards_train/margins_2": 6.295185089111328, "step": 549 }, { "epoch": 1.64, "logps_train/policy_1_2": -150.04171752929688, "logps_train/policy_1_l": -194.0108642578125, "logps_train/policy_1_w": -113.55740356445312, "logps_train/policy_2_2": -97.60711669921875, "logps_train/policy_2_w": -168.394775390625, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.8086647987365723, "rewards_train/1-l": -2.6871702671051025, "rewards_train/1-w": 2.5393283367156982, "rewards_train/2-2": 2.8620426654815674, "rewards_train/2-w": -0.8917730450630188, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.226498603820801, "rewards_train/margins_1": 3.3479931354522705, "rewards_train/margins_2": 3.753815710544586, "step": 549 }, { "epoch": 1.65, "learning_rate": 4.264857863744956e-07, "loss": 0.5579, "step": 550 }, { "epoch": 1.65, "logps_train/policy_1_2": -227.22142028808594, "logps_train/policy_1_l": -190.7032012939453, "logps_train/policy_1_w": -97.55764770507812, "logps_train/policy_2_2": -160.69732666015625, "logps_train/policy_2_w": -155.00112915039062, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -1.877610683441162, "rewards_train/1-l": -2.6944901943206787, "rewards_train/1-w": 2.5214080810546875, "rewards_train/2-2": 2.5503835678100586, "rewards_train/2-w": -1.1235384941101074, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.215898275375366, "rewards_train/margins_1": 4.39901876449585, "rewards_train/margins_2": 3.673922061920166, "step": 550 }, { "epoch": 1.65, "logps_train/policy_1_2": -208.53952026367188, "logps_train/policy_1_l": -157.69650268554688, "logps_train/policy_1_w": -110.76652526855469, "logps_train/policy_2_2": -126.94720458984375, "logps_train/policy_2_w": -181.05960083007812, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -1.8812971115112305, "rewards_train/1-l": -2.0893757343292236, "rewards_train/1-w": 2.858893871307373, "rewards_train/2-2": 3.328131675720215, "rewards_train/2-w": -1.5325217247009277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.948269605636597, "rewards_train/margins_1": 4.7401909828186035, "rewards_train/margins_2": 4.860653400421143, "step": 550 }, { "epoch": 1.65, "logps_train/policy_1_2": -224.8069305419922, "logps_train/policy_1_l": -228.49932861328125, "logps_train/policy_1_w": -150.62786865234375, "logps_train/policy_2_2": -139.59449768066406, "logps_train/policy_2_w": -244.73800659179688, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -202.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": -2.1162405014038086, "rewards_train/1-l": -2.606767177581787, "rewards_train/1-w": 3.3831353187561035, "rewards_train/2-2": 3.344456195831299, "rewards_train/2-w": -2.3282177448272705, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.989902496337891, "rewards_train/margins_1": 5.499375820159912, "rewards_train/margins_2": 5.672673940658569, "step": 550 }, { "epoch": 1.65, "logps_train/policy_1_2": -200.1663818359375, "logps_train/policy_1_l": -200.3428497314453, "logps_train/policy_1_w": -137.35702514648438, "logps_train/policy_2_2": -111.54557800292969, "logps_train/policy_2_w": -219.79136657714844, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -2.0322635173797607, "rewards_train/1-l": -2.0131120681762695, "rewards_train/1-w": 3.221329689025879, "rewards_train/2-2": 3.2874343395233154, "rewards_train/2-w": -1.7771837711334229, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.234441757202148, "rewards_train/margins_1": 5.25359320640564, "rewards_train/margins_2": 5.064618110656738, "step": 550 }, { "epoch": 1.65, "logps_train/policy_1_2": -159.47361755371094, "logps_train/policy_1_l": -120.26449584960938, "logps_train/policy_1_w": -117.12435150146484, "logps_train/policy_2_2": -102.12696075439453, "logps_train/policy_2_w": -175.23851013183594, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -105.5, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -164.0, "rewards_train/1-2": -0.9707984924316406, "rewards_train/1-l": -1.4784510135650635, "rewards_train/1-w": 2.857095956802368, "rewards_train/2-2": 2.756639242172241, "rewards_train/2-w": -1.138695240020752, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.335546970367432, "rewards_train/margins_1": 3.827894449234009, "rewards_train/margins_2": 3.895334482192993, "step": 551 }, { "epoch": 1.65, "logps_train/policy_1_2": -229.84864807128906, "logps_train/policy_1_l": -175.4419708251953, "logps_train/policy_1_w": -143.553955078125, "logps_train/policy_2_2": -151.00196838378906, "logps_train/policy_2_w": -215.0048370361328, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.874659776687622, "rewards_train/1-l": -2.3940019607543945, "rewards_train/1-w": 3.3829345703125, "rewards_train/2-2": 3.5107407569885254, "rewards_train/2-w": -1.5475550889968872, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.7769365310668945, "rewards_train/margins_1": 5.257594347000122, "rewards_train/margins_2": 5.058295845985413, "step": 551 }, { "epoch": 1.65, "logps_train/policy_1_2": -247.98202514648438, "logps_train/policy_1_l": -182.2366943359375, "logps_train/policy_1_w": -142.0428466796875, "logps_train/policy_2_2": -163.790283203125, "logps_train/policy_2_w": -213.7705535888672, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -1.6384360790252686, "rewards_train/1-l": -1.9471807479858398, "rewards_train/1-w": 3.3813600540161133, "rewards_train/2-2": 4.028297424316406, "rewards_train/2-w": -0.9245160818099976, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.328540802001953, "rewards_train/margins_1": 5.019796133041382, "rewards_train/margins_2": 4.952813506126404, "step": 551 }, { "epoch": 1.65, "logps_train/policy_1_2": -198.01669311523438, "logps_train/policy_1_l": -212.89132690429688, "logps_train/policy_1_w": -140.3625030517578, "logps_train/policy_2_2": -132.56643676757812, "logps_train/policy_2_w": -206.83566284179688, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -196.0, "rewards_train/1-2": -0.84151291847229, "rewards_train/1-l": -2.900507926940918, "rewards_train/1-w": 3.2633585929870605, "rewards_train/2-2": 3.269138813018799, "rewards_train/2-w": -1.0749720335006714, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.1638665199279785, "rewards_train/margins_1": 4.104871511459351, "rewards_train/margins_2": 4.34411084651947, "step": 551 }, { "epoch": 1.65, "learning_rate": 4.127897076759399e-07, "loss": 0.4943, "step": 552 }, { "epoch": 1.65, "logps_train/policy_1_2": -137.2360076904297, "logps_train/policy_1_l": -93.35662841796875, "logps_train/policy_1_w": -85.85661315917969, "logps_train/policy_2_2": -86.88325500488281, "logps_train/policy_2_w": -144.05642700195312, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -109.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": -1.0862958431243896, "rewards_train/1-l": -1.0248225927352905, "rewards_train/1-w": 2.4558181762695312, "rewards_train/2-2": 2.2089405059814453, "rewards_train/2-w": -1.1624780893325806, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.4806407690048218, "rewards_train/margins_1": 3.542114019393921, "rewards_train/margins_2": 3.371418595314026, "step": 552 }, { "epoch": 1.65, "logps_train/policy_1_2": -174.8939208984375, "logps_train/policy_1_l": -121.12744903564453, "logps_train/policy_1_w": -74.30287170410156, "logps_train/policy_2_2": -110.8971939086914, "logps_train/policy_2_w": -130.30410766601562, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -115.5, "rewards_train/1-2": -2.032362461090088, "rewards_train/1-l": -1.75639808177948, "rewards_train/1-w": 2.1075055599212646, "rewards_train/2-2": 2.1519806385040283, "rewards_train/2-w": -1.458633542060852, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.8639036417007446, "rewards_train/margins_1": 4.1398680210113525, "rewards_train/margins_2": 3.6106141805648804, "step": 552 }, { "epoch": 1.65, "logps_train/policy_1_2": -199.86062622070312, "logps_train/policy_1_l": -143.83033752441406, "logps_train/policy_1_w": -96.58003234863281, "logps_train/policy_2_2": -129.7342071533203, "logps_train/policy_2_w": -159.4052276611328, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -126.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -1.9861600399017334, "rewards_train/1-l": -1.7481698989868164, "rewards_train/1-w": 2.722856283187866, "rewards_train/2-2": 2.725407600402832, "rewards_train/2-w": -1.378218412399292, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.471026182174683, "rewards_train/margins_1": 4.7090163230896, "rewards_train/margins_2": 4.103626012802124, "step": 552 }, { "epoch": 1.65, "logps_train/policy_1_2": -300.5890808105469, "logps_train/policy_1_l": -249.68748474121094, "logps_train/policy_1_w": -154.87210083007812, "logps_train/policy_2_2": -185.38302612304688, "logps_train/policy_2_w": -249.29934692382812, "logps_train/ref_1_2": -264.0, "logps_train/ref_1_l": -223.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -227.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -3.5417211055755615, "rewards_train/1-l": -2.7175769805908203, "rewards_train/1-w": 3.154195547103882, "rewards_train/2-2": 4.199196815490723, "rewards_train/2-w": -3.0330581665039062, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.871772527694702, "rewards_train/margins_1": 6.695916652679443, "rewards_train/margins_2": 7.232254981994629, "step": 552 }, { "epoch": 1.66, "logps_train/policy_1_2": -171.8941650390625, "logps_train/policy_1_l": -198.00234985351562, "logps_train/policy_1_w": -154.38534545898438, "logps_train/policy_2_2": -117.72075653076172, "logps_train/policy_2_w": -234.25111389160156, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -0.6542598605155945, "rewards_train/1-l": -2.3784565925598145, "rewards_train/1-w": 3.8421285152435303, "rewards_train/2-2": 2.862689733505249, "rewards_train/2-w": -1.2803840637207031, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.220585107803345, "rewards_train/margins_1": 4.496388375759125, "rewards_train/margins_2": 4.143073797225952, "step": 553 }, { "epoch": 1.66, "logps_train/policy_1_2": -177.26126098632812, "logps_train/policy_1_l": -167.8157958984375, "logps_train/policy_1_w": -118.85707092285156, "logps_train/policy_2_2": -112.3824462890625, "logps_train/policy_2_w": -191.54446411132812, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -2.2890162467956543, "rewards_train/1-l": -2.208824634552002, "rewards_train/1-w": 3.028843402862549, "rewards_train/2-2": 2.321521282196045, "rewards_train/2-w": -1.9803264141082764, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.237668037414551, "rewards_train/margins_1": 5.317859649658203, "rewards_train/margins_2": 4.301847696304321, "step": 553 }, { "epoch": 1.66, "logps_train/policy_1_2": -232.29872131347656, "logps_train/policy_1_l": -204.2008514404297, "logps_train/policy_1_w": -155.85528564453125, "logps_train/policy_2_2": -147.6246337890625, "logps_train/policy_2_w": -240.24758911132812, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": -1.989638090133667, "rewards_train/1-l": -2.7321951389312744, "rewards_train/1-w": 3.3968937397003174, "rewards_train/2-2": 3.619568347930908, "rewards_train/2-w": -1.8540539741516113, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.129088878631592, "rewards_train/margins_1": 5.386531829833984, "rewards_train/margins_2": 5.4736223220825195, "step": 553 }, { "epoch": 1.66, "logps_train/policy_1_2": -172.51019287109375, "logps_train/policy_1_l": -121.35405731201172, "logps_train/policy_1_w": -65.19623565673828, "logps_train/policy_2_2": -97.51697540283203, "logps_train/policy_2_w": -119.61734771728516, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -101.0, "logps_train/ref_1_w": -84.5, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -104.0, "rewards_train/1-2": -2.6662421226501465, "rewards_train/1-l": -2.0367612838745117, "rewards_train/1-w": 1.920611023902893, "rewards_train/2-2": 2.4615836143493652, "rewards_train/2-w": -1.5530434846878052, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.957372307777405, "rewards_train/margins_1": 4.5868531465530396, "rewards_train/margins_2": 4.01462709903717, "step": 553 }, { "epoch": 1.66, "learning_rate": 3.992973370223896e-07, "loss": 0.572, "step": 554 }, { "epoch": 1.66, "logps_train/policy_1_2": -160.89773559570312, "logps_train/policy_1_l": -160.8717041015625, "logps_train/policy_1_w": -109.72895050048828, "logps_train/policy_2_2": -95.23677062988281, "logps_train/policy_2_w": -176.31781005859375, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -2.2395787239074707, "rewards_train/1-l": -1.936487078666687, "rewards_train/1-w": 2.40562105178833, "rewards_train/2-2": 2.1944875717163086, "rewards_train/2-w": -1.9968197345733643, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.342108130455017, "rewards_train/margins_1": 4.645199775695801, "rewards_train/margins_2": 4.191307306289673, "step": 554 }, { "epoch": 1.66, "logps_train/policy_1_2": -211.94247436523438, "logps_train/policy_1_l": -198.6042022705078, "logps_train/policy_1_w": -130.13194274902344, "logps_train/policy_2_2": -140.2060546875, "logps_train/policy_2_w": -203.7454833984375, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.7411227226257324, "rewards_train/1-l": -2.6615428924560547, "rewards_train/1-w": 3.539149761199951, "rewards_train/2-2": 3.0614254474639893, "rewards_train/2-w": -1.525719404220581, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.200692653656006, "rewards_train/margins_1": 5.280272483825684, "rewards_train/margins_2": 4.58714485168457, "step": 554 }, { "epoch": 1.66, "logps_train/policy_1_2": -274.43902587890625, "logps_train/policy_1_l": -180.51239013671875, "logps_train/policy_1_w": -146.84423828125, "logps_train/policy_2_2": -178.02294921875, "logps_train/policy_2_w": -238.959716796875, "logps_train/ref_1_2": -253.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": -2.1989831924438477, "rewards_train/1-l": -2.0792670249938965, "rewards_train/1-w": 3.618896961212158, "rewards_train/2-2": 4.13871955871582, "rewards_train/2-w": -2.154956340789795, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.698163986206055, "rewards_train/margins_1": 5.817880153656006, "rewards_train/margins_2": 6.293675899505615, "step": 554 }, { "epoch": 1.66, "logps_train/policy_1_2": -218.0864715576172, "logps_train/policy_1_l": -167.3477020263672, "logps_train/policy_1_w": -112.07907104492188, "logps_train/policy_2_2": -142.44354248046875, "logps_train/policy_2_w": -173.56468200683594, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.6883342266082764, "rewards_train/1-l": -2.1638712882995605, "rewards_train/1-w": 2.9126009941101074, "rewards_train/2-2": 3.4189276695251465, "rewards_train/2-w": -1.1256083250045776, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.076472282409668, "rewards_train/margins_1": 4.600935220718384, "rewards_train/margins_2": 4.544535994529724, "step": 554 }, { "epoch": 1.66, "logps_train/policy_1_2": -188.28909301757812, "logps_train/policy_1_l": -162.34376525878906, "logps_train/policy_1_w": -104.11591339111328, "logps_train/policy_2_2": -121.76109313964844, "logps_train/policy_2_w": -182.58401489257812, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -1.5187525749206543, "rewards_train/1-l": -2.085548162460327, "rewards_train/1-w": 3.006377935409546, "rewards_train/2-2": 2.804358959197998, "rewards_train/2-w": -1.7474644184112549, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.091926097869873, "rewards_train/margins_1": 4.5251305103302, "rewards_train/margins_2": 4.551823377609253, "step": 555 }, { "epoch": 1.66, "logps_train/policy_1_2": -203.923828125, "logps_train/policy_1_l": -246.73171997070312, "logps_train/policy_1_w": -167.17340087890625, "logps_train/policy_2_2": -144.66665649414062, "logps_train/policy_2_w": -237.20115661621094, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -213.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -0.8198486566543579, "rewards_train/1-l": -3.4034457206726074, "rewards_train/1-w": 3.3765077590942383, "rewards_train/2-2": 3.3483734130859375, "rewards_train/2-w": -1.4615213871002197, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.779953479766846, "rewards_train/margins_1": 4.196356415748596, "rewards_train/margins_2": 4.809894800186157, "step": 555 }, { "epoch": 1.66, "logps_train/policy_1_2": -183.0906982421875, "logps_train/policy_1_l": -123.83195495605469, "logps_train/policy_1_w": -104.86030578613281, "logps_train/policy_2_2": -114.35513305664062, "logps_train/policy_2_w": -183.70236206054688, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -1.7573111057281494, "rewards_train/1-l": -1.6775319576263428, "rewards_train/1-w": 2.8461954593658447, "rewards_train/2-2": 2.9532079696655273, "rewards_train/2-w": -2.2829315662384033, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.5237274169921875, "rewards_train/margins_1": 4.603506565093994, "rewards_train/margins_2": 5.236139535903931, "step": 555 }, { "epoch": 1.66, "logps_train/policy_1_2": -160.52255249023438, "logps_train/policy_1_l": -109.25540161132812, "logps_train/policy_1_w": -114.99124908447266, "logps_train/policy_2_2": -101.68973541259766, "logps_train/policy_2_w": -181.53298950195312, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.5477626323699951, "rewards_train/1-l": -1.6089380979537964, "rewards_train/1-w": 3.500093460083008, "rewards_train/2-2": 2.445284366607666, "rewards_train/2-w": -1.5421652793884277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.109031558036804, "rewards_train/margins_1": 5.047856092453003, "rewards_train/margins_2": 3.9874496459960938, "step": 555 }, { "epoch": 1.66, "learning_rate": 3.860099912454346e-07, "loss": 0.5384, "step": 556 }, { "epoch": 1.66, "logps_train/policy_1_2": -232.5150909423828, "logps_train/policy_1_l": -194.4405517578125, "logps_train/policy_1_w": -117.3841552734375, "logps_train/policy_2_2": -145.15411376953125, "logps_train/policy_2_w": -203.23660278320312, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -1.7796339988708496, "rewards_train/1-l": -2.2079226970672607, "rewards_train/1-w": 3.168224573135376, "rewards_train/2-2": 3.4697442054748535, "rewards_train/2-w": -2.3658485412597656, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.376147270202637, "rewards_train/margins_1": 4.947858572006226, "rewards_train/margins_2": 5.835592746734619, "step": 556 }, { "epoch": 1.66, "logps_train/policy_1_2": -199.28057861328125, "logps_train/policy_1_l": -217.81088256835938, "logps_train/policy_1_w": -117.85519409179688, "logps_train/policy_2_2": -119.30734252929688, "logps_train/policy_2_w": -209.65301513671875, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.8604786396026611, "rewards_train/1-l": -3.146712303161621, "rewards_train/1-w": 3.3484652042388916, "rewards_train/2-2": 3.3020787239074707, "rewards_train/2-w": -2.316082000732422, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.495177507400513, "rewards_train/margins_1": 5.208943843841553, "rewards_train/margins_2": 5.618160724639893, "step": 556 }, { "epoch": 1.66, "logps_train/policy_1_2": -203.31289672851562, "logps_train/policy_1_l": -199.1644744873047, "logps_train/policy_1_w": -158.62594604492188, "logps_train/policy_2_2": -130.23263549804688, "logps_train/policy_2_w": -242.56405639648438, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -1.4851959943771362, "rewards_train/1-l": -2.7088301181793213, "rewards_train/1-w": 3.2545928955078125, "rewards_train/2-2": 3.29783034324646, "rewards_train/2-w": -2.3478121757507324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.963423013687134, "rewards_train/margins_1": 4.739788889884949, "rewards_train/margins_2": 5.645642518997192, "step": 556 }, { "epoch": 1.66, "logps_train/policy_1_2": -181.784912109375, "logps_train/policy_1_l": -177.97943115234375, "logps_train/policy_1_w": -115.03876495361328, "logps_train/policy_2_2": -102.08470153808594, "logps_train/policy_2_w": -196.99472045898438, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -2.4872794151306152, "rewards_train/1-l": -2.446185350418091, "rewards_train/1-w": 2.7353811264038086, "rewards_train/2-2": 2.537818431854248, "rewards_train/2-w": -2.2834556102752686, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.181566476821899, "rewards_train/margins_1": 5.222660541534424, "rewards_train/margins_2": 4.821274042129517, "step": 556 }, { "epoch": 1.67, "logps_train/policy_1_2": -176.72073364257812, "logps_train/policy_1_l": -125.33096313476562, "logps_train/policy_1_w": -116.04608154296875, "logps_train/policy_2_2": -102.22895812988281, "logps_train/policy_2_w": -193.20989990234375, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.8054718971252441, "rewards_train/1-l": -1.741788387298584, "rewards_train/1-w": 3.2592594623565674, "rewards_train/2-2": 2.759331226348877, "rewards_train/2-w": -1.720207691192627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.001047849655151, "rewards_train/margins_1": 5.0647313594818115, "rewards_train/margins_2": 4.479538917541504, "step": 557 }, { "epoch": 1.67, "logps_train/policy_1_2": -193.77430725097656, "logps_train/policy_1_l": -167.2379150390625, "logps_train/policy_1_w": -114.16221618652344, "logps_train/policy_2_2": -117.76384735107422, "logps_train/policy_2_w": -173.09030151367188, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -2.0496957302093506, "rewards_train/1-l": -2.3593392372131348, "rewards_train/1-w": 2.622255563735962, "rewards_train/2-2": 3.037092685699463, "rewards_train/2-w": -1.2988755702972412, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.981594800949097, "rewards_train/margins_1": 4.6719512939453125, "rewards_train/margins_2": 4.335968255996704, "step": 557 }, { "epoch": 1.67, "logps_train/policy_1_2": -158.774169921875, "logps_train/policy_1_l": -164.45635986328125, "logps_train/policy_1_w": -110.60874938964844, "logps_train/policy_2_2": -97.55801391601562, "logps_train/policy_2_w": -170.80413818359375, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -124.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.2385025024414062, "rewards_train/1-l": -2.3695974349975586, "rewards_train/1-w": 2.7645161151885986, "rewards_train/2-2": 2.6343603134155273, "rewards_train/2-w": -1.1382265090942383, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.134113550186157, "rewards_train/margins_1": 4.003018617630005, "rewards_train/margins_2": 3.7725868225097656, "step": 557 }, { "epoch": 1.67, "logps_train/policy_1_2": -212.0435791015625, "logps_train/policy_1_l": -170.0394287109375, "logps_train/policy_1_w": -129.10504150390625, "logps_train/policy_2_2": -137.8836669921875, "logps_train/policy_2_w": -204.61190795898438, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.8871698379516602, "rewards_train/1-l": -2.2495474815368652, "rewards_train/1-w": 3.5598082542419434, "rewards_train/2-2": 3.150305986404419, "rewards_train/2-w": -1.4410737752914429, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.809355735778809, "rewards_train/margins_1": 5.4469780921936035, "rewards_train/margins_2": 4.591379761695862, "step": 557 }, { "epoch": 1.67, "learning_rate": 3.729289671665998e-07, "loss": 0.4833, "step": 558 }, { "epoch": 1.67, "logps_train/policy_1_2": -158.48699951171875, "logps_train/policy_1_l": -150.4408721923828, "logps_train/policy_1_w": -91.86170196533203, "logps_train/policy_2_2": -92.99717712402344, "logps_train/policy_2_w": -151.28851318359375, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -125.5, "logps_train/ref_1_w": -116.0, "logps_train/ref_2_2": -118.5, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": -1.9054137468338013, "rewards_train/1-l": -2.4999475479125977, "rewards_train/1-w": 2.447154998779297, "rewards_train/2-2": 2.5353660583496094, "rewards_train/2-w": -1.5312676429748535, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.9471025466918945, "rewards_train/margins_1": 4.352568745613098, "rewards_train/margins_2": 4.066633701324463, "step": 558 }, { "epoch": 1.67, "logps_train/policy_1_2": -157.32217407226562, "logps_train/policy_1_l": -126.82697296142578, "logps_train/policy_1_w": -74.37174987792969, "logps_train/policy_2_2": -97.0128173828125, "logps_train/policy_2_w": -134.45608520507812, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -109.5, "logps_train/ref_1_w": -96.5, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -121.5, "rewards_train/1-2": -1.5384669303894043, "rewards_train/1-l": -1.7038400173187256, "rewards_train/1-w": 2.2164385318756104, "rewards_train/2-2": 2.760437488555908, "rewards_train/2-w": -1.3168976306915283, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.920278549194336, "rewards_train/margins_1": 3.7549054622650146, "rewards_train/margins_2": 4.0773351192474365, "step": 558 }, { "epoch": 1.67, "logps_train/policy_1_2": -184.58131408691406, "logps_train/policy_1_l": -141.58828735351562, "logps_train/policy_1_w": -94.17176818847656, "logps_train/policy_2_2": -115.52825164794922, "logps_train/policy_2_w": -154.81228637695312, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -1.8600845336914062, "rewards_train/1-l": -2.559415817260742, "rewards_train/1-w": 2.4974710941314697, "rewards_train/2-2": 2.9208078384399414, "rewards_train/2-w": -1.6695098876953125, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.056886911392212, "rewards_train/margins_1": 4.357555627822876, "rewards_train/margins_2": 4.590317726135254, "step": 558 }, { "epoch": 1.67, "logps_train/policy_1_2": -180.81072998046875, "logps_train/policy_1_l": -179.78155517578125, "logps_train/policy_1_w": -126.90863037109375, "logps_train/policy_2_2": -118.12239074707031, "logps_train/policy_2_w": -210.22586059570312, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -1.1594911813735962, "rewards_train/1-l": -2.2340152263641357, "rewards_train/1-w": 3.658355712890625, "rewards_train/2-2": 2.6877617835998535, "rewards_train/2-w": -1.6616485118865967, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.892370939254761, "rewards_train/margins_1": 4.817846894264221, "rewards_train/margins_2": 4.34941029548645, "step": 558 }, { "epoch": 1.67, "logps_train/policy_1_2": -123.48869323730469, "logps_train/policy_1_l": -124.23103332519531, "logps_train/policy_1_w": -83.1024169921875, "logps_train/policy_2_2": -75.82405853271484, "logps_train/policy_2_w": -153.33596801757812, "logps_train/ref_1_2": -111.5, "logps_train/ref_1_l": -103.5, "logps_train/ref_1_w": -109.0, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": -1.2293379306793213, "rewards_train/1-l": -2.077692985534668, "rewards_train/1-w": 2.5452277660369873, "rewards_train/2-2": 1.9066567420959473, "rewards_train/2-w": -2.2050812244415283, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.622920751571655, "rewards_train/margins_1": 3.7745656967163086, "rewards_train/margins_2": 4.111737966537476, "step": 559 }, { "epoch": 1.67, "logps_train/policy_1_2": -224.31468200683594, "logps_train/policy_1_l": -174.77880859375, "logps_train/policy_1_w": -95.44151306152344, "logps_train/policy_2_2": -140.9034423828125, "logps_train/policy_2_w": -166.16796875, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -2.5572500228881836, "rewards_train/1-l": -2.2449698448181152, "rewards_train/1-w": 2.665125608444214, "rewards_train/2-2": 3.178406238555908, "rewards_train/2-w": -1.8779305219650269, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.910095453262329, "rewards_train/margins_1": 5.2223756313323975, "rewards_train/margins_2": 5.056336760520935, "step": 559 }, { "epoch": 1.67, "logps_train/policy_1_2": -235.09735107421875, "logps_train/policy_1_l": -204.9808349609375, "logps_train/policy_1_w": -150.61297607421875, "logps_train/policy_2_2": -160.41510009765625, "logps_train/policy_2_w": -215.87808227539062, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -1.0867871046066284, "rewards_train/1-l": -2.350470781326294, "rewards_train/1-w": 3.1202938556671143, "rewards_train/2-2": 3.9711859226226807, "rewards_train/2-w": -0.983121395111084, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.470764636993408, "rewards_train/margins_1": 4.207080960273743, "rewards_train/margins_2": 4.954307317733765, "step": 559 }, { "epoch": 1.67, "logps_train/policy_1_2": -251.7263946533203, "logps_train/policy_1_l": -148.65475463867188, "logps_train/policy_1_w": -98.71195983886719, "logps_train/policy_2_2": -153.5609130859375, "logps_train/policy_2_w": -162.64053344726562, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -2.710921287536621, "rewards_train/1-l": -1.8274877071380615, "rewards_train/1-w": 2.908979892730713, "rewards_train/2-2": 3.9782826900482178, "rewards_train/2-w": -1.2050687074661255, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.736467599868774, "rewards_train/margins_1": 5.619901180267334, "rewards_train/margins_2": 5.183351397514343, "step": 559 }, { "epoch": 1.68, "learning_rate": 3.6005554147077406e-07, "loss": 0.5525, "step": 560 }, { "epoch": 1.68, "logps_train/policy_1_2": -166.70143127441406, "logps_train/policy_1_l": -150.34005737304688, "logps_train/policy_1_w": -111.3397445678711, "logps_train/policy_2_2": -109.09736633300781, "logps_train/policy_2_w": -174.78729248046875, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -1.226002812385559, "rewards_train/1-l": -1.7148663997650146, "rewards_train/1-w": 2.785946846008301, "rewards_train/2-2": 2.474442481994629, "rewards_train/2-w": -1.5295101404190063, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.500813245773315, "rewards_train/margins_1": 4.01194965839386, "rewards_train/margins_2": 4.003952622413635, "step": 560 }, { "epoch": 1.68, "logps_train/policy_1_2": -145.90309143066406, "logps_train/policy_1_l": -89.16592407226562, "logps_train/policy_1_w": -83.18644714355469, "logps_train/policy_2_2": -88.94703674316406, "logps_train/policy_2_w": -144.72702026367188, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -76.5, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -113.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": -1.7106211185455322, "rewards_train/1-l": -1.262296199798584, "rewards_train/1-w": 3.0104570388793945, "rewards_train/2-2": 2.4400620460510254, "rewards_train/2-w": -1.3192832469940186, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.2727532386779785, "rewards_train/margins_1": 4.721078157424927, "rewards_train/margins_2": 3.759345293045044, "step": 560 }, { "epoch": 1.68, "logps_train/policy_1_2": -209.7821807861328, "logps_train/policy_1_l": -160.42156982421875, "logps_train/policy_1_w": -90.50914001464844, "logps_train/policy_2_2": -136.98886108398438, "logps_train/policy_2_w": -159.7106475830078, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -2.2356395721435547, "rewards_train/1-l": -2.2628605365753174, "rewards_train/1-w": 3.308851718902588, "rewards_train/2-2": 2.860001564025879, "rewards_train/2-w": -1.321845531463623, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.571712255477905, "rewards_train/margins_1": 5.544491291046143, "rewards_train/margins_2": 4.181847095489502, "step": 560 }, { "epoch": 1.68, "logps_train/policy_1_2": -175.0540313720703, "logps_train/policy_1_l": -183.65029907226562, "logps_train/policy_1_w": -101.67720794677734, "logps_train/policy_2_2": -119.83590698242188, "logps_train/policy_2_w": -163.42478942871094, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -0.7089188098907471, "rewards_train/1-l": -2.8451075553894043, "rewards_train/1-w": 3.87446665763855, "rewards_train/2-2": 3.0656282901763916, "rewards_train/2-w": -0.4760729670524597, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.719574213027954, "rewards_train/margins_1": 4.583385467529297, "rewards_train/margins_2": 3.5417012572288513, "step": 560 }, { "epoch": 1.68, "logps_train/policy_1_2": -155.86444091796875, "logps_train/policy_1_l": -134.7664337158203, "logps_train/policy_1_w": -96.92939758300781, "logps_train/policy_2_2": -112.88743591308594, "logps_train/policy_2_w": -132.2539520263672, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -118.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -135.0, "rewards_train/1-2": -0.09347429871559143, "rewards_train/1-l": -1.6752758026123047, "rewards_train/1-w": 2.6831345558166504, "rewards_train/2-2": 3.1675057411193848, "rewards_train/2-w": 0.28710561990737915, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.358410358428955, "rewards_train/margins_1": 2.776608854532242, "rewards_train/margins_2": 2.8804001212120056, "step": 561 }, { "epoch": 1.68, "logps_train/policy_1_2": -94.61027526855469, "logps_train/policy_1_l": -104.99433898925781, "logps_train/policy_1_w": -55.25068664550781, "logps_train/policy_2_2": -70.56771850585938, "logps_train/policy_2_w": -80.98431396484375, "logps_train/ref_1_2": -90.5, "logps_train/ref_1_l": -82.0, "logps_train/ref_1_w": -73.5, "logps_train/ref_2_2": -83.0, "logps_train/ref_2_w": -80.5, "rewards_train/1-2": -0.42645734548568726, "rewards_train/1-l": -2.277754306793213, "rewards_train/1-w": 1.8245407342910767, "rewards_train/2-2": 1.2598296403884888, "rewards_train/2-w": -0.06718125939369202, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.1022950410842896, "rewards_train/margins_1": 2.250998079776764, "rewards_train/margins_2": 1.3270108997821808, "step": 561 }, { "epoch": 1.68, "logps_train/policy_1_2": -204.867919921875, "logps_train/policy_1_l": -191.83053588867188, "logps_train/policy_1_w": -122.11211395263672, "logps_train/policy_2_2": -115.74525451660156, "logps_train/policy_2_w": -201.8380889892578, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -2.587574005126953, "rewards_train/1-l": -2.298482894897461, "rewards_train/1-w": 3.074530601501465, "rewards_train/2-2": 3.0590686798095703, "rewards_train/2-w": -2.1728711128234863, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.373013496398926, "rewards_train/margins_1": 5.662104606628418, "rewards_train/margins_2": 5.231939792633057, "step": 561 }, { "epoch": 1.68, "logps_train/policy_1_2": -202.82887268066406, "logps_train/policy_1_l": -169.96592712402344, "logps_train/policy_1_w": -115.52757263183594, "logps_train/policy_2_2": -135.71148681640625, "logps_train/policy_2_w": -191.75401306152344, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.7889416217803955, "rewards_train/1-l": -2.377647638320923, "rewards_train/1-w": 3.3230233192443848, "rewards_train/2-2": 3.0055112838745117, "rewards_train/2-w": -1.893370509147644, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.700670957565308, "rewards_train/margins_1": 5.11196494102478, "rewards_train/margins_2": 4.898881793022156, "step": 561 }, { "epoch": 1.68, "learning_rate": 3.4739097058161116e-07, "loss": 0.7294, "step": 562 }, { "epoch": 1.68, "logps_train/policy_1_2": -189.1785430908203, "logps_train/policy_1_l": -148.33401489257812, "logps_train/policy_1_w": -123.7108154296875, "logps_train/policy_2_2": -126.66779327392578, "logps_train/policy_2_w": -198.19692993164062, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -130.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -1.4129222631454468, "rewards_train/1-l": -1.9050318002700806, "rewards_train/1-w": 3.37423038482666, "rewards_train/2-2": 2.7414727210998535, "rewards_train/2-w": -1.3337550163269043, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.279262185096741, "rewards_train/margins_1": 4.787152647972107, "rewards_train/margins_2": 4.075227737426758, "step": 562 }, { "epoch": 1.68, "logps_train/policy_1_2": -237.34201049804688, "logps_train/policy_1_l": -211.4158935546875, "logps_train/policy_1_w": -169.20236206054688, "logps_train/policy_2_2": -158.6697998046875, "logps_train/policy_2_w": -251.25233459472656, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -207.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -237.0, "rewards_train/1-2": -1.6408416032791138, "rewards_train/1-l": -2.762683868408203, "rewards_train/1-w": 3.753983974456787, "rewards_train/2-2": 3.574036121368408, "rewards_train/2-w": -1.4252328872680664, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.51666784286499, "rewards_train/margins_1": 5.394825577735901, "rewards_train/margins_2": 4.999269008636475, "step": 562 }, { "epoch": 1.68, "logps_train/policy_1_2": -187.27908325195312, "logps_train/policy_1_l": -159.04139709472656, "logps_train/policy_1_w": -115.7405776977539, "logps_train/policy_2_2": -117.82467651367188, "logps_train/policy_2_w": -195.4509735107422, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.8197057247161865, "rewards_train/1-l": -2.376795768737793, "rewards_train/1-w": 3.020571231842041, "rewards_train/2-2": 3.0079617500305176, "rewards_train/2-w": -2.350761651992798, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.397367000579834, "rewards_train/margins_1": 4.8402769565582275, "rewards_train/margins_2": 5.358723402023315, "step": 562 }, { "epoch": 1.68, "logps_train/policy_1_2": -226.71592712402344, "logps_train/policy_1_l": -189.52630615234375, "logps_train/policy_1_w": -118.69631958007812, "logps_train/policy_2_2": -158.10150146484375, "logps_train/policy_2_w": -177.6634521484375, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.5286239385604858, "rewards_train/1-l": -2.3217711448669434, "rewards_train/1-w": 3.0038061141967773, "rewards_train/2-2": 3.228912353515625, "rewards_train/2-w": -0.7163444757461548, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.325577259063721, "rewards_train/margins_1": 4.532430052757263, "rewards_train/margins_2": 3.94525682926178, "step": 562 }, { "epoch": 1.69, "logps_train/policy_1_2": -173.5895233154297, "logps_train/policy_1_l": -159.2663116455078, "logps_train/policy_1_w": -105.45143127441406, "logps_train/policy_2_2": -111.9248046875, "logps_train/policy_2_w": -177.02816772460938, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.587273359298706, "rewards_train/1-l": -2.5322458744049072, "rewards_train/1-w": 2.6630589962005615, "rewards_train/2-2": 2.808104991912842, "rewards_train/2-w": -2.0243024826049805, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.195304870605469, "rewards_train/margins_1": 4.250332355499268, "rewards_train/margins_2": 4.832407474517822, "step": 563 }, { "epoch": 1.69, "logps_train/policy_1_2": -230.89588928222656, "logps_train/policy_1_l": -261.8807373046875, "logps_train/policy_1_w": -130.8634490966797, "logps_train/policy_2_2": -147.36669921875, "logps_train/policy_2_w": -205.87451171875, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -231.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": -2.1565823554992676, "rewards_train/1-l": -3.097543716430664, "rewards_train/1-w": 3.099202871322632, "rewards_train/2-2": 3.349071502685547, "rewards_train/2-w": -1.7007333040237427, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.196746587753296, "rewards_train/margins_1": 5.255785226821899, "rewards_train/margins_2": 5.0498048067092896, "step": 563 }, { "epoch": 1.69, "logps_train/policy_1_2": -170.42897033691406, "logps_train/policy_1_l": -134.05606079101562, "logps_train/policy_1_w": -92.00843811035156, "logps_train/policy_2_2": -110.50044250488281, "logps_train/policy_2_w": -153.0379638671875, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -110.5, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -1.5261003971099854, "rewards_train/1-l": -2.3337318897247314, "rewards_train/1-w": 2.8511576652526855, "rewards_train/2-2": 2.754643440246582, "rewards_train/2-w": -1.1055538654327393, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.184889554977417, "rewards_train/margins_1": 4.377258062362671, "rewards_train/margins_2": 3.8601973056793213, "step": 563 }, { "epoch": 1.69, "logps_train/policy_1_2": -140.29473876953125, "logps_train/policy_1_l": -146.83859252929688, "logps_train/policy_1_w": -104.97970581054688, "logps_train/policy_2_2": -94.92266082763672, "logps_train/policy_2_w": -158.96481323242188, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -126.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -122.5, "logps_train/ref_2_w": -154.0, "rewards_train/1-2": -0.2888491153717041, "rewards_train/1-l": -2.0876684188842773, "rewards_train/1-w": 2.992654800415039, "rewards_train/2-2": 2.7557806968688965, "rewards_train/2-w": -0.4738248288631439, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.080323219299316, "rewards_train/margins_1": 3.281503915786743, "rewards_train/margins_2": 3.2296055257320404, "step": 563 }, { "epoch": 1.69, "learning_rate": 3.3493649053890325e-07, "loss": 0.4933, "step": 564 }, { "epoch": 1.69, "logps_train/policy_1_2": -213.64309692382812, "logps_train/policy_1_l": -163.60401916503906, "logps_train/policy_1_w": -111.80646514892578, "logps_train/policy_2_2": -134.92453002929688, "logps_train/policy_2_w": -193.0762481689453, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -2.361966371536255, "rewards_train/1-l": -2.01792049407959, "rewards_train/1-w": 3.508415937423706, "rewards_train/2-2": 3.0059854984283447, "rewards_train/2-w": -1.3232500553131104, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.526336431503296, "rewards_train/margins_1": 5.870382308959961, "rewards_train/margins_2": 4.329235553741455, "step": 564 }, { "epoch": 1.69, "logps_train/policy_1_2": -205.57212829589844, "logps_train/policy_1_l": -202.53335571289062, "logps_train/policy_1_w": -143.01589965820312, "logps_train/policy_2_2": -133.49049377441406, "logps_train/policy_2_w": -221.86318969726562, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -1.862291693687439, "rewards_train/1-l": -3.393765687942505, "rewards_train/1-w": 3.4142303466796875, "rewards_train/2-2": 3.100170135498047, "rewards_train/2-w": -1.7593662738800049, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.807996034622192, "rewards_train/margins_1": 5.2765220403671265, "rewards_train/margins_2": 4.859536409378052, "step": 564 }, { "epoch": 1.69, "logps_train/policy_1_2": -266.8505554199219, "logps_train/policy_1_l": -241.92047119140625, "logps_train/policy_1_w": -219.44879150390625, "logps_train/policy_2_2": -171.92904663085938, "logps_train/policy_2_w": -319.58984375, "logps_train/ref_1_2": -247.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -264.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -302.0, "rewards_train/1-2": -2.0225579738616943, "rewards_train/1-l": -1.9304264783859253, "rewards_train/1-w": 4.405120849609375, "rewards_train/2-2": 3.7649083137512207, "rewards_train/2-w": -1.7046886682510376, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.3355473279953, "rewards_train/margins_1": 6.427678823471069, "rewards_train/margins_2": 5.469596982002258, "step": 564 }, { "epoch": 1.69, "logps_train/policy_1_2": -172.8568115234375, "logps_train/policy_1_l": -186.60067749023438, "logps_train/policy_1_w": -122.13469696044922, "logps_train/policy_2_2": -118.2081527709961, "logps_train/policy_2_w": -192.05404663085938, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -0.7358770370483398, "rewards_train/1-l": -2.591707229614258, "rewards_train/1-w": 3.1919987201690674, "rewards_train/2-2": 2.9660017490386963, "rewards_train/2-w": -1.3022785186767578, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.783705949783325, "rewards_train/margins_1": 3.9278757572174072, "rewards_train/margins_2": 4.268280267715454, "step": 564 }, { "epoch": 1.69, "logps_train/policy_1_2": -184.05604553222656, "logps_train/policy_1_l": -161.31690979003906, "logps_train/policy_1_w": -106.92127990722656, "logps_train/policy_2_2": -105.84107971191406, "logps_train/policy_2_w": -169.63934326171875, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -2.3593153953552246, "rewards_train/1-l": -1.8724143505096436, "rewards_train/1-w": 2.1904897689819336, "rewards_train/2-2": 2.311570882797241, "rewards_train/2-w": -2.0022144317626953, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.062904119491577, "rewards_train/margins_1": 4.549805164337158, "rewards_train/margins_2": 4.3137853145599365, "step": 565 }, { "epoch": 1.69, "logps_train/policy_1_2": -188.64035034179688, "logps_train/policy_1_l": -131.673583984375, "logps_train/policy_1_w": -100.57723999023438, "logps_train/policy_2_2": -130.2816162109375, "logps_train/policy_2_w": -146.82908630371094, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -115.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -1.4728244543075562, "rewards_train/1-l": -1.6628673076629639, "rewards_train/1-w": 2.1764559745788574, "rewards_train/2-2": 2.589416980743408, "rewards_train/2-w": -0.8579080104827881, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8393232822418213, "rewards_train/margins_1": 3.6492804288864136, "rewards_train/margins_2": 3.4473249912261963, "step": 565 }, { "epoch": 1.69, "logps_train/policy_1_2": -200.86148071289062, "logps_train/policy_1_l": -163.08070373535156, "logps_train/policy_1_w": -124.24684143066406, "logps_train/policy_2_2": -140.46905517578125, "logps_train/policy_2_w": -180.06875610351562, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.9326328039169312, "rewards_train/1-l": -2.104701519012451, "rewards_train/1-w": 2.8150136470794678, "rewards_train/2-2": 3.1530935764312744, "rewards_train/2-w": -0.9806057214736938, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.919715166091919, "rewards_train/margins_1": 3.747646450996399, "rewards_train/margins_2": 4.133699297904968, "step": 565 }, { "epoch": 1.69, "logps_train/policy_1_2": -202.989990234375, "logps_train/policy_1_l": -170.5538330078125, "logps_train/policy_1_w": -132.33465576171875, "logps_train/policy_2_2": -127.37863159179688, "logps_train/policy_2_w": -217.6688232421875, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.6169686317443848, "rewards_train/1-l": -2.0670528411865234, "rewards_train/1-w": 3.263801336288452, "rewards_train/2-2": 3.3006129264831543, "rewards_train/2-w": -1.994225025177002, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.330854177474976, "rewards_train/margins_1": 4.880769968032837, "rewards_train/margins_2": 5.294837951660156, "step": 565 }, { "epoch": 1.69, "learning_rate": 3.2269331687794695e-07, "loss": 0.4786, "step": 566 }, { "epoch": 1.69, "logps_train/policy_1_2": -215.35104370117188, "logps_train/policy_1_l": -159.1065673828125, "logps_train/policy_1_w": -110.36480712890625, "logps_train/policy_2_2": -138.09080505371094, "logps_train/policy_2_w": -168.04949951171875, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -2.215376853942871, "rewards_train/1-l": -1.8232779502868652, "rewards_train/1-w": 2.641888380050659, "rewards_train/2-2": 2.8747081756591797, "rewards_train/2-w": -1.2066835165023804, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.465166330337524, "rewards_train/margins_1": 4.85726523399353, "rewards_train/margins_2": 4.08139169216156, "step": 566 }, { "epoch": 1.69, "logps_train/policy_1_2": -220.36285400390625, "logps_train/policy_1_l": -119.79316711425781, "logps_train/policy_1_w": -102.30738067626953, "logps_train/policy_2_2": -149.28994750976562, "logps_train/policy_2_w": -152.53790283203125, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -101.5, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -2.3933169841766357, "rewards_train/1-l": -1.8381056785583496, "rewards_train/1-w": 2.8676998615264893, "rewards_train/2-2": 2.6413185596466064, "rewards_train/2-w": -0.7198061943054199, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.705805540084839, "rewards_train/margins_1": 5.261016845703125, "rewards_train/margins_2": 3.3611247539520264, "step": 566 }, { "epoch": 1.69, "logps_train/policy_1_2": -199.68695068359375, "logps_train/policy_1_l": -198.89321899414062, "logps_train/policy_1_w": -127.17509460449219, "logps_train/policy_2_2": -134.5811309814453, "logps_train/policy_2_w": -183.38626098632812, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -1.6003367900848389, "rewards_train/1-l": -2.8285815715789795, "rewards_train/1-w": 2.239180088043213, "rewards_train/2-2": 2.9538984298706055, "rewards_train/2-w": -1.4624528884887695, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.067761659622192, "rewards_train/margins_1": 3.8395168781280518, "rewards_train/margins_2": 4.416351318359375, "step": 566 }, { "epoch": 1.69, "logps_train/policy_1_2": -178.08935546875, "logps_train/policy_1_l": -167.36459350585938, "logps_train/policy_1_w": -117.27861022949219, "logps_train/policy_2_2": -116.33134460449219, "logps_train/policy_2_w": -168.156005859375, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -1.3530752658843994, "rewards_train/1-l": -2.406381130218506, "rewards_train/1-w": 2.2717480659484863, "rewards_train/2-2": 2.9719433784484863, "rewards_train/2-w": -0.984350323677063, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.678129196166992, "rewards_train/margins_1": 3.6248233318328857, "rewards_train/margins_2": 3.9562937021255493, "step": 566 }, { "epoch": 1.7, "logps_train/policy_1_2": -155.94473266601562, "logps_train/policy_1_l": -145.9962158203125, "logps_train/policy_1_w": -86.97502136230469, "logps_train/policy_2_2": -96.51583099365234, "logps_train/policy_2_w": -148.62213134765625, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -1.6341702938079834, "rewards_train/1-l": -2.5448367595672607, "rewards_train/1-w": 2.5860915184020996, "rewards_train/2-2": 2.521268844604492, "rewards_train/2-w": -1.469244122505188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.13092827796936, "rewards_train/margins_1": 4.220261812210083, "rewards_train/margins_2": 3.99051296710968, "step": 567 }, { "epoch": 1.7, "logps_train/policy_1_2": -260.86865234375, "logps_train/policy_1_l": -203.84417724609375, "logps_train/policy_1_w": -112.62675476074219, "logps_train/policy_2_2": -174.18760681152344, "logps_train/policy_2_w": -184.90667724609375, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -209.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -2.4353034496307373, "rewards_train/1-l": -2.441596269607544, "rewards_train/1-w": 3.0127148628234863, "rewards_train/2-2": 3.4433486461639404, "rewards_train/2-w": -1.4051204919815063, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.45431113243103, "rewards_train/margins_1": 5.448018312454224, "rewards_train/margins_2": 4.848469138145447, "step": 567 }, { "epoch": 1.7, "logps_train/policy_1_2": -204.7669677734375, "logps_train/policy_1_l": -199.52703857421875, "logps_train/policy_1_w": -152.0436553955078, "logps_train/policy_2_2": -140.1323699951172, "logps_train/policy_2_w": -215.96719360351562, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -190.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -1.4705455303192139, "rewards_train/1-l": -2.5141286849975586, "rewards_train/1-w": 3.753446578979492, "rewards_train/2-2": 2.8898873329162598, "rewards_train/2-w": -0.81937575340271, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.267575263977051, "rewards_train/margins_1": 5.223992109298706, "rewards_train/margins_2": 3.7092630863189697, "step": 567 }, { "epoch": 1.7, "logps_train/policy_1_2": -232.7916259765625, "logps_train/policy_1_l": -232.2687225341797, "logps_train/policy_1_w": -140.0186004638672, "logps_train/policy_2_2": -151.4166259765625, "logps_train/policy_2_w": -217.18014526367188, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": -1.7901004552841187, "rewards_train/1-l": -2.8169116973876953, "rewards_train/1-w": 3.221968173980713, "rewards_train/2-2": 3.2192745208740234, "rewards_train/2-w": -1.607077717781067, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.038879871368408, "rewards_train/margins_1": 5.0120686292648315, "rewards_train/margins_2": 4.82635223865509, "step": 567 }, { "epoch": 1.7, "learning_rate": 3.1066264451090816e-07, "loss": 0.6009, "step": 568 }, { "epoch": 1.7, "logps_train/policy_1_2": -139.33615112304688, "logps_train/policy_1_l": -72.16368103027344, "logps_train/policy_1_w": -64.29540252685547, "logps_train/policy_2_2": -93.79536437988281, "logps_train/policy_2_w": -103.8297348022461, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -57.5, "logps_train/ref_1_w": -84.0, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -97.5, "rewards_train/1-2": -0.8890831470489502, "rewards_train/1-l": -1.4654405117034912, "rewards_train/1-w": 1.9790537357330322, "rewards_train/2-2": 2.225346803665161, "rewards_train/2-w": -0.62086421251297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.4444942474365234, "rewards_train/margins_1": 2.8681368827819824, "rewards_train/margins_2": 2.846211016178131, "step": 568 }, { "epoch": 1.7, "logps_train/policy_1_2": -160.61900329589844, "logps_train/policy_1_l": -153.47555541992188, "logps_train/policy_1_w": -108.77869415283203, "logps_train/policy_2_2": -107.19512939453125, "logps_train/policy_2_w": -187.59100341796875, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.1261579990386963, "rewards_train/1-l": -1.6567356586456299, "rewards_train/1-w": 3.387755870819092, "rewards_train/2-2": 2.3439629077911377, "rewards_train/2-w": -1.7704278230667114, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.044491529464722, "rewards_train/margins_1": 4.513913869857788, "rewards_train/margins_2": 4.114390730857849, "step": 568 }, { "epoch": 1.7, "logps_train/policy_1_2": -121.07272338867188, "logps_train/policy_1_l": -187.54788208007812, "logps_train/policy_1_w": -106.70610809326172, "logps_train/policy_2_2": -78.49102020263672, "logps_train/policy_2_w": -163.6271209716797, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -102.5, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -0.4080533981323242, "rewards_train/1-l": -2.475491762161255, "rewards_train/1-w": 2.359076976776123, "rewards_train/2-2": 2.418475866317749, "rewards_train/2-w": -1.2615405321121216, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.834568738937378, "rewards_train/margins_1": 2.7671303749084473, "rewards_train/margins_2": 3.6800163984298706, "step": 568 }, { "epoch": 1.7, "logps_train/policy_1_2": -250.76869201660156, "logps_train/policy_1_l": -265.591796875, "logps_train/policy_1_w": -162.93455505371094, "logps_train/policy_2_2": -178.0647430419922, "logps_train/policy_2_w": -224.26626586914062, "logps_train/ref_1_2": -236.0, "logps_train/ref_1_l": -235.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": -1.4712066650390625, "rewards_train/1-l": -3.1068358421325684, "rewards_train/1-w": 3.3608412742614746, "rewards_train/2-2": 3.2241909503936768, "rewards_train/2-w": -0.6543612480163574, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.467677116394043, "rewards_train/margins_1": 4.832047939300537, "rewards_train/margins_2": 3.878552198410034, "step": 568 }, { "epoch": 1.7, "logps_train/policy_1_2": -220.09803771972656, "logps_train/policy_1_l": -202.069091796875, "logps_train/policy_1_w": -152.24630737304688, "logps_train/policy_2_2": -141.60226440429688, "logps_train/policy_2_w": -239.653076171875, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -188.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -1.6062874794006348, "rewards_train/1-l": -2.2608141899108887, "rewards_train/1-w": 3.6101346015930176, "rewards_train/2-2": 3.4469521045684814, "rewards_train/2-w": -1.8926525115966797, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.870948791503906, "rewards_train/margins_1": 5.216422080993652, "rewards_train/margins_2": 5.339604616165161, "step": 569 }, { "epoch": 1.7, "logps_train/policy_1_2": -227.5106658935547, "logps_train/policy_1_l": -166.43606567382812, "logps_train/policy_1_w": -113.58029174804688, "logps_train/policy_2_2": -134.37249755859375, "logps_train/policy_2_w": -197.09341430664062, "logps_train/ref_1_2": -202.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -2.5756752490997314, "rewards_train/1-l": -2.2201685905456543, "rewards_train/1-w": 3.077908515930176, "rewards_train/2-2": 3.5299367904663086, "rewards_train/2-w": -2.0601229667663574, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.29807710647583, "rewards_train/margins_1": 5.653583765029907, "rewards_train/margins_2": 5.590059757232666, "step": 569 }, { "epoch": 1.7, "logps_train/policy_1_2": -221.0300750732422, "logps_train/policy_1_l": -194.01412963867188, "logps_train/policy_1_w": -167.11697387695312, "logps_train/policy_2_2": -160.929931640625, "logps_train/policy_2_w": -223.86572265625, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -0.6655080318450928, "rewards_train/1-l": -2.3728976249694824, "rewards_train/1-w": 3.292207717895508, "rewards_train/2-2": 3.379664182662964, "rewards_train/2-w": -0.43579238653182983, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.66510534286499, "rewards_train/margins_1": 3.9577157497406006, "rewards_train/margins_2": 3.8154565691947937, "step": 569 }, { "epoch": 1.7, "logps_train/policy_1_2": -218.01382446289062, "logps_train/policy_1_l": -167.50970458984375, "logps_train/policy_1_w": -125.76872253417969, "logps_train/policy_2_2": -130.28419494628906, "logps_train/policy_2_w": -210.81690979003906, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -2.690444231033325, "rewards_train/1-l": -2.0683534145355225, "rewards_train/1-w": 4.47527551651001, "rewards_train/2-2": 3.139158248901367, "rewards_train/2-w": -1.054543137550354, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.543628931045532, "rewards_train/margins_1": 7.165719747543335, "rewards_train/margins_2": 4.193701386451721, "step": 569 }, { "epoch": 1.71, "learning_rate": 2.9884564761020083e-07, "loss": 0.649, "step": 570 }, { "epoch": 1.71, "logps_train/policy_1_2": -199.0902862548828, "logps_train/policy_1_l": -191.53993225097656, "logps_train/policy_1_w": -119.43777465820312, "logps_train/policy_2_2": -134.71163940429688, "logps_train/policy_2_w": -186.9031982421875, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -1.4492627382278442, "rewards_train/1-l": -2.2977919578552246, "rewards_train/1-w": 3.0473361015319824, "rewards_train/2-2": 2.7899699211120605, "rewards_train/2-w": -1.2801631689071655, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.345128059387207, "rewards_train/margins_1": 4.496598839759827, "rewards_train/margins_2": 4.070133090019226, "step": 570 }, { "epoch": 1.71, "logps_train/policy_1_2": -237.89962768554688, "logps_train/policy_1_l": -180.9617919921875, "logps_train/policy_1_w": -145.8288116455078, "logps_train/policy_2_2": -154.8181915283203, "logps_train/policy_2_w": -244.74497985839844, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": -1.9477746486663818, "rewards_train/1-l": -1.3078985214233398, "rewards_train/1-w": 3.766338348388672, "rewards_train/2-2": 3.191227674484253, "rewards_train/2-w": -2.3577003479003906, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.074236869812012, "rewards_train/margins_1": 5.714112997055054, "rewards_train/margins_2": 5.5489280223846436, "step": 570 }, { "epoch": 1.71, "logps_train/policy_1_2": -208.43638610839844, "logps_train/policy_1_l": -171.42037963867188, "logps_train/policy_1_w": -158.91380310058594, "logps_train/policy_2_2": -139.4566650390625, "logps_train/policy_2_w": -230.93048095703125, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": -1.4810400009155273, "rewards_train/1-l": -1.9694982767105103, "rewards_train/1-w": 3.626051187515259, "rewards_train/2-2": 3.3027710914611816, "rewards_train/2-w": -1.3619930744171143, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.595549464225769, "rewards_train/margins_1": 5.107091188430786, "rewards_train/margins_2": 4.664764165878296, "step": 570 }, { "epoch": 1.71, "logps_train/policy_1_2": -138.237060546875, "logps_train/policy_1_l": -114.98117065429688, "logps_train/policy_1_w": -72.59481811523438, "logps_train/policy_2_2": -79.10173797607422, "logps_train/policy_2_w": -111.96463775634766, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -90.0, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -104.0, "rewards_train/1-2": -1.3787851333618164, "rewards_train/1-l": -2.336202621459961, "rewards_train/1-w": 1.7309482097625732, "rewards_train/2-2": 2.9433417320251465, "rewards_train/2-w": -0.7991982698440552, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.067150831222534, "rewards_train/margins_1": 3.1097333431243896, "rewards_train/margins_2": 3.7425400018692017, "step": 570 }, { "epoch": 1.71, "logps_train/policy_1_2": -261.16668701171875, "logps_train/policy_1_l": -207.99029541015625, "logps_train/policy_1_w": -122.35908508300781, "logps_train/policy_2_2": -161.76528930664062, "logps_train/policy_2_w": -204.66404724121094, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -3.113544464111328, "rewards_train/1-l": -2.211822032928467, "rewards_train/1-w": 3.1100146770477295, "rewards_train/2-2": 3.707063674926758, "rewards_train/2-w": -2.196774959564209, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.321836709976196, "rewards_train/margins_1": 6.223559141159058, "rewards_train/margins_2": 5.903838634490967, "step": 571 }, { "epoch": 1.71, "logps_train/policy_1_2": -169.52755737304688, "logps_train/policy_1_l": -193.80813598632812, "logps_train/policy_1_w": -165.29177856445312, "logps_train/policy_2_2": -108.04064178466797, "logps_train/policy_2_w": -237.33413696289062, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": -1.016037940979004, "rewards_train/1-l": -2.272219657897949, "rewards_train/1-w": 4.245821952819824, "rewards_train/2-2": 3.0564823150634766, "rewards_train/2-w": -0.600601851940155, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 6.518041610717773, "rewards_train/margins_1": 5.261859893798828, "rewards_train/margins_2": 3.6570841670036316, "step": 571 }, { "epoch": 1.71, "logps_train/policy_1_2": -243.31285095214844, "logps_train/policy_1_l": -226.1219024658203, "logps_train/policy_1_w": -139.03512573242188, "logps_train/policy_2_2": -141.655029296875, "logps_train/policy_2_w": -250.2337188720703, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -172.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -3.0953469276428223, "rewards_train/1-l": -1.8801597356796265, "rewards_train/1-w": 3.278517723083496, "rewards_train/2-2": 3.096996784210205, "rewards_train/2-w": -3.081186294555664, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.158677458763123, "rewards_train/margins_1": 6.373864650726318, "rewards_train/margins_2": 6.178183078765869, "step": 571 }, { "epoch": 1.71, "logps_train/policy_1_2": -234.6195068359375, "logps_train/policy_1_l": -267.637939453125, "logps_train/policy_1_w": -188.59097290039062, "logps_train/policy_2_2": -170.58961486816406, "logps_train/policy_2_w": -273.6601257324219, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -239.0, "logps_train/ref_1_w": -235.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -268.0, "rewards_train/1-2": -0.43851348757743835, "rewards_train/1-l": -2.8200435638427734, "rewards_train/1-w": 4.645589351654053, "rewards_train/2-2": 3.9558823108673096, "rewards_train/2-w": -0.5394511222839355, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 7.465632915496826, "rewards_train/margins_1": 5.084102839231491, "rewards_train/margins_2": 4.495333433151245, "step": 571 }, { "epoch": 1.71, "learning_rate": 2.8724347949389056e-07, "loss": 0.4818, "step": 572 }, { "epoch": 1.71, "logps_train/policy_1_2": -153.87164306640625, "logps_train/policy_1_l": -151.01510620117188, "logps_train/policy_1_w": -89.14295959472656, "logps_train/policy_2_2": -101.3592300415039, "logps_train/policy_2_w": -140.9151611328125, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -116.5, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": -1.1891167163848877, "rewards_train/1-l": -3.0363383293151855, "rewards_train/1-w": 2.7165634632110596, "rewards_train/2-2": 2.3890769481658936, "rewards_train/2-w": -0.880968451499939, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.752901792526245, "rewards_train/margins_1": 3.9056801795959473, "rewards_train/margins_2": 3.2700453996658325, "step": 572 }, { "epoch": 1.71, "logps_train/policy_1_2": -254.90631103515625, "logps_train/policy_1_l": -227.61770629882812, "logps_train/policy_1_w": -139.52804565429688, "logps_train/policy_2_2": -154.48036193847656, "logps_train/policy_2_w": -226.52972412109375, "logps_train/ref_1_2": -228.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -2.6921932697296143, "rewards_train/1-l": -2.3426294326782227, "rewards_train/1-w": 3.5354771614074707, "rewards_train/2-2": 3.5234477519989014, "rewards_train/2-w": -1.941253900527954, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.878106594085693, "rewards_train/margins_1": 6.227670431137085, "rewards_train/margins_2": 5.4647016525268555, "step": 572 }, { "epoch": 1.71, "logps_train/policy_1_2": -185.00721740722656, "logps_train/policy_1_l": -151.98297119140625, "logps_train/policy_1_w": -126.87095642089844, "logps_train/policy_2_2": -113.13356018066406, "logps_train/policy_2_w": -208.166015625, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.4495489597320557, "rewards_train/1-l": -2.5268125534057617, "rewards_train/1-w": 3.015052080154419, "rewards_train/2-2": 3.0188705921173096, "rewards_train/2-w": -1.8279297351837158, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.541864633560181, "rewards_train/margins_1": 4.464601039886475, "rewards_train/margins_2": 4.846800327301025, "step": 572 }, { "epoch": 1.71, "logps_train/policy_1_2": -164.46780395507812, "logps_train/policy_1_l": -150.7415313720703, "logps_train/policy_1_w": -118.09620666503906, "logps_train/policy_2_2": -104.15191650390625, "logps_train/policy_2_w": -191.2451934814453, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -1.169435977935791, "rewards_train/1-l": -2.1407556533813477, "rewards_train/1-w": 3.366941452026367, "rewards_train/2-2": 2.8088321685791016, "rewards_train/2-w": -1.5559650659561157, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.507697105407715, "rewards_train/margins_1": 4.536377429962158, "rewards_train/margins_2": 4.364797234535217, "step": 572 }, { "epoch": 1.72, "logps_train/policy_1_2": -177.85311889648438, "logps_train/policy_1_l": -178.07398986816406, "logps_train/policy_1_w": -118.56546783447266, "logps_train/policy_2_2": -113.6548080444336, "logps_train/policy_2_w": -189.89987182617188, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -1.1739842891693115, "rewards_train/1-l": -2.355055809020996, "rewards_train/1-w": 3.371577739715576, "rewards_train/2-2": 2.7567849159240723, "rewards_train/2-w": -1.542330026626587, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.726633548736572, "rewards_train/margins_1": 4.545562028884888, "rewards_train/margins_2": 4.299114942550659, "step": 573 }, { "epoch": 1.72, "logps_train/policy_1_2": -223.32180786132812, "logps_train/policy_1_l": -232.8866729736328, "logps_train/policy_1_w": -138.48655700683594, "logps_train/policy_2_2": -137.6820068359375, "logps_train/policy_2_w": -234.5611114501953, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -2.461869239807129, "rewards_train/1-l": -2.781721353530884, "rewards_train/1-w": 3.1973533630371094, "rewards_train/2-2": 2.9489879608154297, "rewards_train/2-w": -2.8397653102874756, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.979074716567993, "rewards_train/margins_1": 5.659222602844238, "rewards_train/margins_2": 5.788753271102905, "step": 573 }, { "epoch": 1.72, "logps_train/policy_1_2": -206.09568786621094, "logps_train/policy_1_l": -244.834228515625, "logps_train/policy_1_w": -167.09129333496094, "logps_train/policy_2_2": -127.85692596435547, "logps_train/policy_2_w": -259.3270568847656, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -208.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -238.0, "rewards_train/1-2": -1.7595677375793457, "rewards_train/1-l": -2.7365002632141113, "rewards_train/1-w": 4.048684120178223, "rewards_train/2-2": 3.532276153564453, "rewards_train/2-w": -2.1448161602020264, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.785184383392334, "rewards_train/margins_1": 5.808251857757568, "rewards_train/margins_2": 5.6770923137664795, "step": 573 }, { "epoch": 1.72, "logps_train/policy_1_2": -204.23094177246094, "logps_train/policy_1_l": -164.6583251953125, "logps_train/policy_1_w": -111.30530548095703, "logps_train/policy_2_2": -130.97970581054688, "logps_train/policy_2_w": -174.2660369873047, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -1.9889147281646729, "rewards_train/1-l": -2.0383920669555664, "rewards_train/1-w": 2.832213878631592, "rewards_train/2-2": 3.1539816856384277, "rewards_train/2-w": -1.1591236591339111, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.870605945587158, "rewards_train/margins_1": 4.821128606796265, "rewards_train/margins_2": 4.313105344772339, "step": 573 }, { "epoch": 1.72, "learning_rate": 2.7585727251313196e-07, "loss": 0.483, "step": 574 }, { "epoch": 1.72, "logps_train/policy_1_2": -176.7192840576172, "logps_train/policy_1_l": -156.04690551757812, "logps_train/policy_1_w": -125.61197662353516, "logps_train/policy_2_2": -116.61029815673828, "logps_train/policy_2_w": -204.10791015625, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.0988821983337402, "rewards_train/1-l": -1.9732450246810913, "rewards_train/1-w": 3.4364590644836426, "rewards_train/2-2": 2.661430835723877, "rewards_train/2-w": -1.710009217262268, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.409704089164734, "rewards_train/margins_1": 4.535341262817383, "rewards_train/margins_2": 4.371440052986145, "step": 574 }, { "epoch": 1.72, "logps_train/policy_1_2": -301.537353515625, "logps_train/policy_1_l": -209.86676025390625, "logps_train/policy_1_w": -131.46995544433594, "logps_train/policy_2_2": -194.10470581054688, "logps_train/policy_2_w": -226.24221801757812, "logps_train/ref_1_2": -262.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -234.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -3.8894758224487305, "rewards_train/1-l": -2.86336088180542, "rewards_train/1-w": 3.620582342147827, "rewards_train/2-2": 3.959207773208618, "rewards_train/2-w": -2.282815456390381, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.483943223953247, "rewards_train/margins_1": 7.510058164596558, "rewards_train/margins_2": 6.242023229598999, "step": 574 }, { "epoch": 1.72, "logps_train/policy_1_2": -213.4896240234375, "logps_train/policy_1_l": -169.43460083007812, "logps_train/policy_1_w": -177.01402282714844, "logps_train/policy_2_2": -150.62310791015625, "logps_train/policy_2_w": -243.2403564453125, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -212.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": -1.010681390762329, "rewards_train/1-l": -2.023637056350708, "rewards_train/1-w": 3.496840238571167, "rewards_train/2-2": 3.5205016136169434, "rewards_train/2-w": -1.2236449718475342, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.520477294921875, "rewards_train/margins_1": 4.507521629333496, "rewards_train/margins_2": 4.7441465854644775, "step": 574 }, { "epoch": 1.72, "logps_train/policy_1_2": -141.95611572265625, "logps_train/policy_1_l": -123.66602325439453, "logps_train/policy_1_w": -62.16881561279297, "logps_train/policy_2_2": -86.65185546875, "logps_train/policy_2_w": -98.291748046875, "logps_train/ref_1_2": -127.5, "logps_train/ref_1_l": -97.5, "logps_train/ref_1_w": -78.5, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -92.0, "rewards_train/1-2": -1.4653372764587402, "rewards_train/1-l": -2.611598253250122, "rewards_train/1-w": 1.6547980308532715, "rewards_train/2-2": 2.2989745140075684, "rewards_train/2-w": -0.6326898336410522, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.2663962841033936, "rewards_train/margins_1": 3.1201353073120117, "rewards_train/margins_2": 2.9316643476486206, "step": 574 }, { "epoch": 1.72, "logps_train/policy_1_2": -164.46780395507812, "logps_train/policy_1_l": -191.0868682861328, "logps_train/policy_1_w": -107.7862548828125, "logps_train/policy_2_2": -106.97933959960938, "logps_train/policy_2_w": -167.82565307617188, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -1.2868201732635498, "rewards_train/1-l": -3.401264190673828, "rewards_train/1-w": 2.426453113555908, "rewards_train/2-2": 2.278043270111084, "rewards_train/2-w": -1.7966272830963135, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.827717304229736, "rewards_train/margins_1": 3.713273286819458, "rewards_train/margins_2": 4.0746705532073975, "step": 575 }, { "epoch": 1.72, "logps_train/policy_1_2": -170.5421142578125, "logps_train/policy_1_l": -170.15264892578125, "logps_train/policy_1_w": -118.62063598632812, "logps_train/policy_2_2": -99.28257751464844, "logps_train/policy_2_w": -189.4932098388672, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.7616345882415771, "rewards_train/1-l": -2.0287654399871826, "rewards_train/1-w": 3.0840301513671875, "rewards_train/2-2": 2.9002573490142822, "rewards_train/2-w": -1.7641650438308716, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.11279559135437, "rewards_train/margins_1": 4.845664739608765, "rewards_train/margins_2": 4.664422392845154, "step": 575 }, { "epoch": 1.72, "logps_train/policy_1_2": -197.21307373046875, "logps_train/policy_1_l": -179.80238342285156, "logps_train/policy_1_w": -88.7930908203125, "logps_train/policy_2_2": -117.23286437988281, "logps_train/policy_2_w": -159.10357666015625, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -2.4002132415771484, "rewards_train/1-l": -3.26138973236084, "rewards_train/1-w": 2.595104694366455, "rewards_train/2-2": 2.9278852939605713, "rewards_train/2-w": -1.9341861009597778, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.856494426727295, "rewards_train/margins_1": 4.9953179359436035, "rewards_train/margins_2": 4.862071394920349, "step": 575 }, { "epoch": 1.72, "logps_train/policy_1_2": -153.9635009765625, "logps_train/policy_1_l": -135.98959350585938, "logps_train/policy_1_w": -79.86909484863281, "logps_train/policy_2_2": -90.06402587890625, "logps_train/policy_2_w": -136.23074340820312, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -122.0, "rewards_train/1-2": -2.0168590545654297, "rewards_train/1-l": -2.4168643951416016, "rewards_train/1-w": 2.2396953105926514, "rewards_train/2-2": 2.656878709793091, "rewards_train/2-w": -1.396219253540039, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.656559705734253, "rewards_train/margins_1": 4.256554365158081, "rewards_train/margins_2": 4.05309796333313, "step": 575 }, { "epoch": 1.72, "learning_rate": 2.6468813794165356e-07, "loss": 0.6197, "step": 576 }, { "epoch": 1.72, "logps_train/policy_1_2": -224.36029052734375, "logps_train/policy_1_l": -203.03855895996094, "logps_train/policy_1_w": -152.39517211914062, "logps_train/policy_2_2": -156.5690155029297, "logps_train/policy_2_w": -226.80429077148438, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -189.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -1.3727463483810425, "rewards_train/1-l": -2.495603084564209, "rewards_train/1-w": 3.397007465362549, "rewards_train/2-2": 3.2188806533813477, "rewards_train/2-w": -1.276033878326416, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.892610549926758, "rewards_train/margins_1": 4.769753813743591, "rewards_train/margins_2": 4.494914531707764, "step": 576 }, { "epoch": 1.72, "logps_train/policy_1_2": -197.53427124023438, "logps_train/policy_1_l": -143.2041015625, "logps_train/policy_1_w": -112.77546691894531, "logps_train/policy_2_2": -118.5191421508789, "logps_train/policy_2_w": -194.0872802734375, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -2.1428322792053223, "rewards_train/1-l": -2.0221667289733887, "rewards_train/1-w": 3.1419849395751953, "rewards_train/2-2": 2.772841453552246, "rewards_train/2-w": -2.071815252304077, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.164151668548584, "rewards_train/margins_1": 5.284817218780518, "rewards_train/margins_2": 4.844656705856323, "step": 576 }, { "epoch": 1.72, "logps_train/policy_1_2": -261.97222900390625, "logps_train/policy_1_l": -242.92803955078125, "logps_train/policy_1_w": -125.9776611328125, "logps_train/policy_2_2": -179.8754425048828, "logps_train/policy_2_w": -196.95291137695312, "logps_train/ref_1_2": -241.0, "logps_train/ref_1_l": -217.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -217.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -2.080427646636963, "rewards_train/1-l": -2.620635986328125, "rewards_train/1-w": 3.3729374408721924, "rewards_train/2-2": 3.6511282920837402, "rewards_train/2-w": -1.1827912330627441, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.993573427200317, "rewards_train/margins_1": 5.453365087509155, "rewards_train/margins_2": 4.833919525146484, "step": 576 }, { "epoch": 1.72, "logps_train/policy_1_2": -180.36126708984375, "logps_train/policy_1_l": -141.59625244140625, "logps_train/policy_1_w": -97.95161437988281, "logps_train/policy_2_2": -112.47137451171875, "logps_train/policy_2_w": -148.97323608398438, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -1.8706978559494019, "rewards_train/1-l": -2.0903868675231934, "rewards_train/1-w": 2.2230029106140137, "rewards_train/2-2": 2.6329410076141357, "rewards_train/2-w": -1.0319925546646118, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.313389778137207, "rewards_train/margins_1": 4.0937007665634155, "rewards_train/margins_2": 3.6649335622787476, "step": 576 }, { "epoch": 1.73, "logps_train/policy_1_2": -214.4267120361328, "logps_train/policy_1_l": -194.9765625, "logps_train/policy_1_w": -174.37045288085938, "logps_train/policy_2_2": -147.28060913085938, "logps_train/policy_2_w": -246.06857299804688, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -206.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": -1.563765048980713, "rewards_train/1-l": -1.8253414630889893, "rewards_train/1-w": 3.119596242904663, "rewards_train/2-2": 2.988736152648926, "rewards_train/2-w": -1.6991431713104248, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.944937705993652, "rewards_train/margins_1": 4.683361291885376, "rewards_train/margins_2": 4.687879323959351, "step": 577 }, { "epoch": 1.73, "logps_train/policy_1_2": -178.89524841308594, "logps_train/policy_1_l": -166.60646057128906, "logps_train/policy_1_w": -136.997314453125, "logps_train/policy_2_2": -112.677734375, "logps_train/policy_2_w": -221.29586791992188, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.7512437105178833, "rewards_train/1-l": -1.8721688985824585, "rewards_train/1-w": 3.303393840789795, "rewards_train/2-2": 2.62148380279541, "rewards_train/2-w": -2.4081034660339355, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.175562739372253, "rewards_train/margins_1": 5.054637551307678, "rewards_train/margins_2": 5.029587268829346, "step": 577 }, { "epoch": 1.73, "logps_train/policy_1_2": -161.15049743652344, "logps_train/policy_1_l": -168.36181640625, "logps_train/policy_1_w": -115.75870513916016, "logps_train/policy_2_2": -107.2393798828125, "logps_train/policy_2_w": -179.22315979003906, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.7646589875221252, "rewards_train/1-l": -2.7942867279052734, "rewards_train/1-w": 3.1499104499816895, "rewards_train/2-2": 2.7787957191467285, "rewards_train/2-w": -0.9293470978736877, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.944197177886963, "rewards_train/margins_1": 3.9145694375038147, "rewards_train/margins_2": 3.7081428170204163, "step": 577 }, { "epoch": 1.73, "logps_train/policy_1_2": -102.65217590332031, "logps_train/policy_1_l": -128.05101013183594, "logps_train/policy_1_w": -104.98770141601562, "logps_train/policy_2_2": -66.8355712890625, "logps_train/policy_2_w": -156.69497680664062, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -106.5, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -88.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -0.2734205722808838, "rewards_train/1-l": -2.1509509086608887, "rewards_train/1-w": 2.631894826889038, "rewards_train/2-2": 2.1121461391448975, "rewards_train/2-w": -1.0976231098175049, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.782845735549927, "rewards_train/margins_1": 2.905315399169922, "rewards_train/margins_2": 3.2097692489624023, "step": 577 }, { "epoch": 1.73, "learning_rate": 2.5373716586730047e-07, "loss": 0.5639, "step": 578 }, { "epoch": 1.73, "logps_train/policy_1_2": -238.63011169433594, "logps_train/policy_1_l": -209.38626098632812, "logps_train/policy_1_w": -145.17291259765625, "logps_train/policy_2_2": -139.57772827148438, "logps_train/policy_2_w": -244.50697326660156, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -2.6801981925964355, "rewards_train/1-l": -2.9078030586242676, "rewards_train/1-w": 3.144428014755249, "rewards_train/2-2": 3.688809871673584, "rewards_train/2-w": -2.4487435817718506, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.052231073379517, "rewards_train/margins_1": 5.824626207351685, "rewards_train/margins_2": 6.137553453445435, "step": 578 }, { "epoch": 1.73, "logps_train/policy_1_2": -178.65264892578125, "logps_train/policy_1_l": -156.30706787109375, "logps_train/policy_1_w": -103.04219818115234, "logps_train/policy_2_2": -111.86943817138672, "logps_train/policy_2_w": -164.43052673339844, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -1.8472974300384521, "rewards_train/1-l": -2.3173277378082275, "rewards_train/1-w": 2.454472064971924, "rewards_train/2-2": 2.4443063735961914, "rewards_train/2-w": -1.6876816749572754, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.771799802780151, "rewards_train/margins_1": 4.301769495010376, "rewards_train/margins_2": 4.131988048553467, "step": 578 }, { "epoch": 1.73, "logps_train/policy_1_2": -204.3058624267578, "logps_train/policy_1_l": -207.5616455078125, "logps_train/policy_1_w": -143.65771484375, "logps_train/policy_2_2": -128.66014099121094, "logps_train/policy_2_w": -240.13693237304688, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -182.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -1.9778516292572021, "rewards_train/1-l": -3.102647066116333, "rewards_train/1-w": 3.8061044216156006, "rewards_train/2-2": 2.8667988777160645, "rewards_train/2-w": -2.605100393295288, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.908751487731934, "rewards_train/margins_1": 5.783956050872803, "rewards_train/margins_2": 5.4718992710113525, "step": 578 }, { "epoch": 1.73, "logps_train/policy_1_2": -96.31523132324219, "logps_train/policy_1_l": -105.37127685546875, "logps_train/policy_1_w": -63.805625915527344, "logps_train/policy_2_2": -56.592872619628906, "logps_train/policy_2_w": -109.52771759033203, "logps_train/ref_1_2": -87.0, "logps_train/ref_1_l": -86.5, "logps_train/ref_1_w": -82.5, "logps_train/ref_2_2": -73.0, "logps_train/ref_2_w": -96.5, "rewards_train/1-2": -0.9385550022125244, "rewards_train/1-l": -1.8980531692504883, "rewards_train/1-w": 1.8815470933914185, "rewards_train/2-2": 1.656826138496399, "rewards_train/2-w": -1.2884167432785034, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.7796002626419067, "rewards_train/margins_1": 2.820102095603943, "rewards_train/margins_2": 2.9452428817749023, "step": 578 }, { "epoch": 1.73, "logps_train/policy_1_2": -192.87738037109375, "logps_train/policy_1_l": -180.38070678710938, "logps_train/policy_1_w": -129.35409545898438, "logps_train/policy_2_2": -115.625732421875, "logps_train/policy_2_w": -197.43374633789062, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -2.1678154468536377, "rewards_train/1-l": -2.4162211418151855, "rewards_train/1-w": 3.16986346244812, "rewards_train/2-2": 2.8932859897613525, "rewards_train/2-w": -1.2441561222076416, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.586084604263306, "rewards_train/margins_1": 5.337678909301758, "rewards_train/margins_2": 4.137442111968994, "step": 579 }, { "epoch": 1.73, "logps_train/policy_1_2": -157.29043579101562, "logps_train/policy_1_l": -181.11172485351562, "logps_train/policy_1_w": -121.90835571289062, "logps_train/policy_2_2": -97.36587524414062, "logps_train/policy_2_w": -199.91107177734375, "logps_train/ref_1_2": -142.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -1.5790437459945679, "rewards_train/1-l": -2.854337215423584, "rewards_train/1-w": 2.801352024078369, "rewards_train/2-2": 2.4194674491882324, "rewards_train/2-w": -2.1204049587249756, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.655689239501953, "rewards_train/margins_1": 4.380395770072937, "rewards_train/margins_2": 4.539872407913208, "step": 579 }, { "epoch": 1.73, "logps_train/policy_1_2": -208.48477172851562, "logps_train/policy_1_l": -167.7003936767578, "logps_train/policy_1_w": -87.66293334960938, "logps_train/policy_2_2": -126.83102416992188, "logps_train/policy_2_w": -140.46826171875, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -132.0, "rewards_train/1-2": -2.7621493339538574, "rewards_train/1-l": -2.910860061645508, "rewards_train/1-w": 2.732534885406494, "rewards_train/2-2": 3.042287826538086, "rewards_train/2-w": -0.8280768394470215, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.643394947052002, "rewards_train/margins_1": 5.494684219360352, "rewards_train/margins_2": 3.8703646659851074, "step": 579 }, { "epoch": 1.73, "logps_train/policy_1_2": -220.08447265625, "logps_train/policy_1_l": -205.3925323486328, "logps_train/policy_1_w": -146.47117614746094, "logps_train/policy_2_2": -136.3584442138672, "logps_train/policy_2_w": -244.6367645263672, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -217.0, "rewards_train/1-2": -2.6834471225738525, "rewards_train/1-l": -2.2251906394958496, "rewards_train/1-w": 3.450929641723633, "rewards_train/2-2": 2.8883748054504395, "rewards_train/2-w": -2.763676166534424, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.676120281219482, "rewards_train/margins_1": 6.134376764297485, "rewards_train/margins_2": 5.652050971984863, "step": 579 }, { "epoch": 1.74, "learning_rate": 2.430054250856412e-07, "loss": 0.5888, "step": 580 }, { "epoch": 1.74, "logps_train/policy_1_2": -212.4072723388672, "logps_train/policy_1_l": -213.71151733398438, "logps_train/policy_1_w": -111.73226928710938, "logps_train/policy_2_2": -133.76870727539062, "logps_train/policy_2_w": -185.61032104492188, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.8001017570495605, "rewards_train/1-l": -3.002621650695801, "rewards_train/1-w": 2.803334951400757, "rewards_train/2-2": 3.1684417724609375, "rewards_train/2-w": -1.7872037887573242, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.805956602096558, "rewards_train/margins_1": 4.603436708450317, "rewards_train/margins_2": 4.955645561218262, "step": 580 }, { "epoch": 1.74, "logps_train/policy_1_2": -172.8955078125, "logps_train/policy_1_l": -177.70050048828125, "logps_train/policy_1_w": -153.77267456054688, "logps_train/policy_2_2": -119.30664825439453, "logps_train/policy_2_w": -221.3663330078125, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -1.1065418720245361, "rewards_train/1-l": -2.6378235816955566, "rewards_train/1-w": 2.89524245262146, "rewards_train/2-2": 2.6177730560302734, "rewards_train/2-w": -1.7469861507415771, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.533066034317017, "rewards_train/margins_1": 4.001784324645996, "rewards_train/margins_2": 4.364759206771851, "step": 580 }, { "epoch": 1.74, "logps_train/policy_1_2": -168.45425415039062, "logps_train/policy_1_l": -151.61654663085938, "logps_train/policy_1_w": -87.70339965820312, "logps_train/policy_2_2": -108.48794555664062, "logps_train/policy_2_w": -152.32810974121094, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -112.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -1.50089430809021, "rewards_train/1-l": -2.3055756092071533, "rewards_train/1-w": 2.42565655708313, "rewards_train/2-2": 2.521811008453369, "rewards_train/2-w": -1.3939440250396729, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.731232166290283, "rewards_train/margins_1": 3.92655086517334, "rewards_train/margins_2": 3.915755033493042, "step": 580 }, { "epoch": 1.74, "logps_train/policy_1_2": -167.35067749023438, "logps_train/policy_1_l": -201.44439697265625, "logps_train/policy_1_w": -140.6402130126953, "logps_train/policy_2_2": -105.47584533691406, "logps_train/policy_2_w": -220.83389282226562, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": -1.5881929397583008, "rewards_train/1-l": -2.3350648880004883, "rewards_train/1-w": 2.966252088546753, "rewards_train/2-2": 2.479661226272583, "rewards_train/2-w": -2.199502468109131, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.301316976547241, "rewards_train/margins_1": 4.554445028305054, "rewards_train/margins_2": 4.679163694381714, "step": 580 }, { "epoch": 1.74, "logps_train/policy_1_2": -242.2711181640625, "logps_train/policy_1_l": -203.60980224609375, "logps_train/policy_1_w": -140.69529724121094, "logps_train/policy_2_2": -161.17282104492188, "logps_train/policy_2_w": -206.08187866210938, "logps_train/ref_1_2": -219.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -2.2954721450805664, "rewards_train/1-l": -2.7582688331604004, "rewards_train/1-w": 2.7945566177368164, "rewards_train/2-2": 3.224905252456665, "rewards_train/2-w": -1.3921719789505005, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.552825450897217, "rewards_train/margins_1": 5.090028762817383, "rewards_train/margins_2": 4.6170772314071655, "step": 581 }, { "epoch": 1.74, "logps_train/policy_1_2": -234.52011108398438, "logps_train/policy_1_l": -167.49478149414062, "logps_train/policy_1_w": -118.14107513427734, "logps_train/policy_2_2": -156.3633270263672, "logps_train/policy_2_w": -182.22943115234375, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.6262297630310059, "rewards_train/1-l": -1.6397136449813843, "rewards_train/1-w": 3.1548376083374023, "rewards_train/2-2": 3.5960888862609863, "rewards_train/2-w": -0.9608340263366699, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.794551253318787, "rewards_train/margins_1": 4.781067371368408, "rewards_train/margins_2": 4.556922912597656, "step": 581 }, { "epoch": 1.74, "logps_train/policy_1_2": -197.88729858398438, "logps_train/policy_1_l": -144.50393676757812, "logps_train/policy_1_w": -111.16991424560547, "logps_train/policy_2_2": -123.31512451171875, "logps_train/policy_2_w": -183.885009765625, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -2.2680273056030273, "rewards_train/1-l": -2.352346420288086, "rewards_train/1-w": 2.3739261627197266, "rewards_train/2-2": 2.73628568649292, "rewards_train/2-w": -2.2556891441345215, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.7262725830078125, "rewards_train/margins_1": 4.641953468322754, "rewards_train/margins_2": 4.991974830627441, "step": 581 }, { "epoch": 1.74, "logps_train/policy_1_2": -206.85816955566406, "logps_train/policy_1_l": -220.08229064941406, "logps_train/policy_1_w": -167.10507202148438, "logps_train/policy_2_2": -138.84494018554688, "logps_train/policy_2_w": -256.9453125, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": -1.859351634979248, "rewards_train/1-l": -2.5396015644073486, "rewards_train/1-w": 3.119840621948242, "rewards_train/2-2": 2.4659945964813232, "rewards_train/2-w": -2.4341797828674316, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.659442186355591, "rewards_train/margins_1": 4.97919225692749, "rewards_train/margins_2": 4.900174379348755, "step": 581 }, { "epoch": 1.74, "learning_rate": 2.3249396299565685e-07, "loss": 0.5623, "step": 582 }, { "epoch": 1.74, "logps_train/policy_1_2": -208.1444854736328, "logps_train/policy_1_l": -185.18685913085938, "logps_train/policy_1_w": -150.8706817626953, "logps_train/policy_2_2": -132.49180603027344, "logps_train/policy_2_w": -226.8857421875, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -1.9718705415725708, "rewards_train/1-l": -2.349740982055664, "rewards_train/1-w": 3.2344164848327637, "rewards_train/2-2": 3.0121474266052246, "rewards_train/2-w": -2.0352542400360107, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.584157466888428, "rewards_train/margins_1": 5.2062870264053345, "rewards_train/margins_2": 5.047401666641235, "step": 582 }, { "epoch": 1.74, "logps_train/policy_1_2": -187.9217987060547, "logps_train/policy_1_l": -200.85003662109375, "logps_train/policy_1_w": -145.12033081054688, "logps_train/policy_2_2": -119.09508514404297, "logps_train/policy_2_w": -226.38723754882812, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -1.564446210861206, "rewards_train/1-l": -2.510784864425659, "rewards_train/1-w": 3.4461708068847656, "rewards_train/2-2": 2.8897104263305664, "rewards_train/2-w": -1.9090359210968018, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.956955671310425, "rewards_train/margins_1": 5.010617017745972, "rewards_train/margins_2": 4.798746347427368, "step": 582 }, { "epoch": 1.74, "logps_train/policy_1_2": -212.85256958007812, "logps_train/policy_1_l": -230.48318481445312, "logps_train/policy_1_w": -152.76385498046875, "logps_train/policy_2_2": -146.17918395996094, "logps_train/policy_2_w": -212.38131713867188, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -178.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -1.5352568626403809, "rewards_train/1-l": -3.2338645458221436, "rewards_train/1-w": 3.116583824157715, "rewards_train/2-2": 3.1531753540039062, "rewards_train/2-w": -0.9561002850532532, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.350448369979858, "rewards_train/margins_1": 4.651840686798096, "rewards_train/margins_2": 4.109275639057159, "step": 582 }, { "epoch": 1.74, "logps_train/policy_1_2": -165.9860382080078, "logps_train/policy_1_l": -171.35562133789062, "logps_train/policy_1_w": -85.84373474121094, "logps_train/policy_2_2": -103.37626647949219, "logps_train/policy_2_w": -136.13893127441406, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -126.5, "rewards_train/1-2": -1.419111728668213, "rewards_train/1-l": -2.3290674686431885, "rewards_train/1-w": 2.564455032348633, "rewards_train/2-2": 2.271846294403076, "rewards_train/2-w": -0.9446553587913513, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.893522500991821, "rewards_train/margins_1": 3.9835667610168457, "rewards_train/margins_2": 3.2165016531944275, "step": 582 }, { "epoch": 1.75, "logps_train/policy_1_2": -192.4402618408203, "logps_train/policy_1_l": -176.7232208251953, "logps_train/policy_1_w": -130.40512084960938, "logps_train/policy_2_2": -116.89999389648438, "logps_train/policy_2_w": -215.2236328125, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -2.0744950771331787, "rewards_train/1-l": -2.412897825241089, "rewards_train/1-w": 2.8505032062530518, "rewards_train/2-2": 2.7928128242492676, "rewards_train/2-w": -2.449120283126831, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.263401031494141, "rewards_train/margins_1": 4.9249982833862305, "rewards_train/margins_2": 5.241933107376099, "step": 583 }, { "epoch": 1.75, "logps_train/policy_1_2": -260.5790100097656, "logps_train/policy_1_l": -182.97000122070312, "logps_train/policy_1_w": -142.4931182861328, "logps_train/policy_2_2": -180.5614471435547, "logps_train/policy_2_w": -203.53494262695312, "logps_train/ref_1_2": -246.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -1.4063396453857422, "rewards_train/1-l": -2.3747353553771973, "rewards_train/1-w": 3.200688362121582, "rewards_train/2-2": 3.928229808807373, "rewards_train/2-w": -0.8456823229789734, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.575423717498779, "rewards_train/margins_1": 4.607028007507324, "rewards_train/margins_2": 4.773912131786346, "step": 583 }, { "epoch": 1.75, "logps_train/policy_1_2": -172.92213439941406, "logps_train/policy_1_l": -186.21405029296875, "logps_train/policy_1_w": -106.20774841308594, "logps_train/policy_2_2": -107.05118560791016, "logps_train/policy_2_w": -175.9995880126953, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -1.853834867477417, "rewards_train/1-l": -2.7630550861358643, "rewards_train/1-w": 2.4728779792785645, "rewards_train/2-2": 2.546834945678711, "rewards_train/2-w": -2.0312082767486572, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.235933065414429, "rewards_train/margins_1": 4.3267128467559814, "rewards_train/margins_2": 4.578043222427368, "step": 583 }, { "epoch": 1.75, "logps_train/policy_1_2": -150.04136657714844, "logps_train/policy_1_l": -114.2253646850586, "logps_train/policy_1_w": -91.15299987792969, "logps_train/policy_2_2": -88.4225082397461, "logps_train/policy_2_w": -154.78407287597656, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -116.5, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -1.331480622291565, "rewards_train/1-l": -2.275026559829712, "rewards_train/1-w": 3.3206372261047363, "rewards_train/2-2": 2.800717830657959, "rewards_train/2-w": -0.926845371723175, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.595663785934448, "rewards_train/margins_1": 4.652117848396301, "rewards_train/margins_2": 3.727563202381134, "step": 583 }, { "epoch": 1.75, "learning_rate": 2.222038054975173e-07, "loss": 0.5444, "step": 584 }, { "epoch": 1.75, "logps_train/policy_1_2": -190.819580078125, "logps_train/policy_1_l": -217.07553100585938, "logps_train/policy_1_w": -146.47897338867188, "logps_train/policy_2_2": -119.36134338378906, "logps_train/policy_2_w": -247.827880859375, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -185.0, "logps_train/ref_1_w": -187.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": -1.568676471710205, "rewards_train/1-l": -3.1829442977905273, "rewards_train/1-w": 4.048196315765381, "rewards_train/2-2": 2.9044911861419678, "rewards_train/2-w": -2.7351319789886475, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.231140613555908, "rewards_train/margins_1": 5.616872787475586, "rewards_train/margins_2": 5.639623165130615, "step": 584 }, { "epoch": 1.75, "logps_train/policy_1_2": -208.33016967773438, "logps_train/policy_1_l": -170.96392822265625, "logps_train/policy_1_w": -114.29450988769531, "logps_train/policy_2_2": -134.833984375, "logps_train/policy_2_w": -184.04248046875, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -2.1990814208984375, "rewards_train/1-l": -2.5401430130004883, "rewards_train/1-w": 2.645939350128174, "rewards_train/2-2": 2.8439579010009766, "rewards_train/2-w": -1.8805668354034424, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.186082363128662, "rewards_train/margins_1": 4.845020771026611, "rewards_train/margins_2": 4.724524736404419, "step": 584 }, { "epoch": 1.75, "logps_train/policy_1_2": -236.96377563476562, "logps_train/policy_1_l": -176.4793243408203, "logps_train/policy_1_w": -124.1899642944336, "logps_train/policy_2_2": -157.3056182861328, "logps_train/policy_2_w": -195.73597717285156, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -2.271768569946289, "rewards_train/1-l": -1.4848463535308838, "rewards_train/1-w": 3.0517067909240723, "rewards_train/2-2": 3.3381879329681396, "rewards_train/2-w": -1.7228165864944458, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.536553144454956, "rewards_train/margins_1": 5.323475360870361, "rewards_train/margins_2": 5.0610045194625854, "step": 584 }, { "epoch": 1.75, "logps_train/policy_1_2": -155.30552673339844, "logps_train/policy_1_l": -168.2891387939453, "logps_train/policy_1_w": -110.37721252441406, "logps_train/policy_2_2": -93.02348327636719, "logps_train/policy_2_w": -182.4068145751953, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -115.5, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -2.0402212142944336, "rewards_train/1-l": -2.319953441619873, "rewards_train/1-w": 2.6027090549468994, "rewards_train/2-2": 2.2234325408935547, "rewards_train/2-w": -1.9766199588775635, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.9226624965667725, "rewards_train/margins_1": 4.642930269241333, "rewards_train/margins_2": 4.200052499771118, "step": 584 }, { "epoch": 1.75, "logps_train/policy_1_2": -166.98388671875, "logps_train/policy_1_l": -134.70193481445312, "logps_train/policy_1_w": -106.71835327148438, "logps_train/policy_2_2": -98.19487762451172, "logps_train/policy_2_w": -190.5341796875, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -117.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.845653772354126, "rewards_train/1-l": -1.7725375890731812, "rewards_train/1-w": 3.364102363586426, "rewards_train/2-2": 2.7223095893859863, "rewards_train/2-w": -2.3054680824279785, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.136639952659607, "rewards_train/margins_1": 5.209756135940552, "rewards_train/margins_2": 5.027777671813965, "step": 585 }, { "epoch": 1.75, "logps_train/policy_1_2": -248.85476684570312, "logps_train/policy_1_l": -235.11888122558594, "logps_train/policy_1_w": -138.44451904296875, "logps_train/policy_2_2": -160.1811981201172, "logps_train/policy_2_w": -206.5349884033203, "logps_train/ref_1_2": -225.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -2.4245386123657227, "rewards_train/1-l": -3.5484118461608887, "rewards_train/1-w": 2.9177560806274414, "rewards_train/2-2": 3.7818803787231445, "rewards_train/2-w": -1.2620936632156372, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.46616792678833, "rewards_train/margins_1": 5.342294692993164, "rewards_train/margins_2": 5.043974041938782, "step": 585 }, { "epoch": 1.75, "logps_train/policy_1_2": -207.2781982421875, "logps_train/policy_1_l": -132.1939697265625, "logps_train/policy_1_w": -120.48464965820312, "logps_train/policy_2_2": -145.86276245117188, "logps_train/policy_2_w": -186.44874572753906, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -108.5, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -171.0, "rewards_train/1-2": -1.909460186958313, "rewards_train/1-l": -2.351330280303955, "rewards_train/1-w": 2.773020029067993, "rewards_train/2-2": 2.1873574256896973, "rewards_train/2-w": -1.5417498350143433, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.124350309371948, "rewards_train/margins_1": 4.682480216026306, "rewards_train/margins_2": 3.7291072607040405, "step": 585 }, { "epoch": 1.75, "logps_train/policy_1_2": -185.11663818359375, "logps_train/policy_1_l": -177.40997314453125, "logps_train/policy_1_w": -133.31561279296875, "logps_train/policy_2_2": -118.95551300048828, "logps_train/policy_2_w": -214.3744354248047, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -2.0495550632476807, "rewards_train/1-l": -2.760528326034546, "rewards_train/1-w": 3.009551763534546, "rewards_train/2-2": 2.372711181640625, "rewards_train/2-w": -2.734708786010742, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.770080089569092, "rewards_train/margins_1": 5.059106826782227, "rewards_train/margins_2": 5.107419967651367, "step": 585 }, { "epoch": 1.75, "learning_rate": 2.1213595689245386e-07, "loss": 0.5083, "step": 586 }, { "epoch": 1.75, "logps_train/policy_1_2": -163.24717712402344, "logps_train/policy_1_l": -160.317138671875, "logps_train/policy_1_w": -116.63365173339844, "logps_train/policy_2_2": -100.59317016601562, "logps_train/policy_2_w": -185.62777709960938, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -1.43116295337677, "rewards_train/1-l": -2.5415782928466797, "rewards_train/1-w": 2.873744249343872, "rewards_train/2-2": 2.687558174133301, "rewards_train/2-w": -2.017465591430664, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.415322542190552, "rewards_train/margins_1": 4.304907202720642, "rewards_train/margins_2": 4.705023765563965, "step": 586 }, { "epoch": 1.75, "logps_train/policy_1_2": -169.73297119140625, "logps_train/policy_1_l": -158.39077758789062, "logps_train/policy_1_w": -90.25161743164062, "logps_train/policy_2_2": -104.85343933105469, "logps_train/policy_2_w": -149.17381286621094, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -114.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -138.0, "rewards_train/1-2": -1.5384328365325928, "rewards_train/1-l": -1.8231594562530518, "rewards_train/1-w": 2.363607406616211, "rewards_train/2-2": 2.7687582969665527, "rewards_train/2-w": -1.171678066253662, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.186766862869263, "rewards_train/margins_1": 3.9020402431488037, "rewards_train/margins_2": 3.940436363220215, "step": 586 }, { "epoch": 1.75, "logps_train/policy_1_2": -100.70244598388672, "logps_train/policy_1_l": -126.57849884033203, "logps_train/policy_1_w": -78.70225524902344, "logps_train/policy_2_2": -69.67818450927734, "logps_train/policy_2_w": -118.2012939453125, "logps_train/ref_1_2": -96.5, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -103.5, "logps_train/ref_2_2": -87.5, "logps_train/ref_2_w": -114.5, "rewards_train/1-2": -0.4065728783607483, "rewards_train/1-l": -2.0709352493286133, "rewards_train/1-w": 2.4651265144348145, "rewards_train/2-2": 1.7755404710769653, "rewards_train/2-w": -0.347278356552124, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 4.536061763763428, "rewards_train/margins_1": 2.8716993927955627, "rewards_train/margins_2": 2.1228188276290894, "step": 586 }, { "epoch": 1.75, "logps_train/policy_1_2": -248.29701232910156, "logps_train/policy_1_l": -219.0374298095703, "logps_train/policy_1_w": -161.07403564453125, "logps_train/policy_2_2": -150.40219116210938, "logps_train/policy_2_w": -254.2899169921875, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": -2.9859519004821777, "rewards_train/1-l": -2.7392897605895996, "rewards_train/1-w": 3.432391881942749, "rewards_train/2-2": 3.5800931453704834, "rewards_train/2-w": -2.1727428436279297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.171681642532349, "rewards_train/margins_1": 6.418343782424927, "rewards_train/margins_2": 5.752835988998413, "step": 586 }, { "epoch": 1.76, "logps_train/policy_1_2": -183.5569305419922, "logps_train/policy_1_l": -185.87899780273438, "logps_train/policy_1_w": -141.52447509765625, "logps_train/policy_2_2": -114.56828308105469, "logps_train/policy_2_w": -220.13607788085938, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -1.1814746856689453, "rewards_train/1-l": -2.656649351119995, "rewards_train/1-w": 3.413372039794922, "rewards_train/2-2": 3.2005934715270996, "rewards_train/2-w": -1.5237644910812378, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.070021390914917, "rewards_train/margins_1": 4.594846725463867, "rewards_train/margins_2": 4.724357962608337, "step": 587 }, { "epoch": 1.76, "logps_train/policy_1_2": -270.3526916503906, "logps_train/policy_1_l": -177.09539794921875, "logps_train/policy_1_w": -142.39389038085938, "logps_train/policy_2_2": -181.45632934570312, "logps_train/policy_2_w": -224.318115234375, "logps_train/ref_1_2": -253.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -218.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -1.7837055921554565, "rewards_train/1-l": -1.8118842840194702, "rewards_train/1-w": 3.361001491546631, "rewards_train/2-2": 3.68874192237854, "rewards_train/2-w": -1.9208738803863525, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.172885775566101, "rewards_train/margins_1": 5.144707083702087, "rewards_train/margins_2": 5.609615802764893, "step": 587 }, { "epoch": 1.76, "logps_train/policy_1_2": -199.3950653076172, "logps_train/policy_1_l": -159.5376434326172, "logps_train/policy_1_w": -99.1546630859375, "logps_train/policy_2_2": -127.43167877197266, "logps_train/policy_2_w": -149.7792510986328, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -2.3967320919036865, "rewards_train/1-l": -2.2148475646972656, "rewards_train/1-w": 2.70269775390625, "rewards_train/2-2": 2.795504331588745, "rewards_train/2-w": -0.7998000979423523, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.917545318603516, "rewards_train/margins_1": 5.0994298458099365, "rewards_train/margins_2": 3.5953044295310974, "step": 587 }, { "epoch": 1.76, "logps_train/policy_1_2": -144.78497314453125, "logps_train/policy_1_l": -163.53668212890625, "logps_train/policy_1_w": -96.44050598144531, "logps_train/policy_2_2": -86.41166687011719, "logps_train/policy_2_w": -149.96206665039062, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -114.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.2202928066253662, "rewards_train/1-l": -2.592926502227783, "rewards_train/1-w": 2.7522382736206055, "rewards_train/2-2": 2.7865684032440186, "rewards_train/2-w": -0.9363436698913574, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.345164775848389, "rewards_train/margins_1": 3.9725310802459717, "rewards_train/margins_2": 3.722912073135376, "step": 587 }, { "epoch": 1.76, "learning_rate": 2.022913997847417e-07, "loss": 0.5543, "step": 588 }, { "epoch": 1.76, "logps_train/policy_1_2": -196.62123107910156, "logps_train/policy_1_l": -201.04100036621094, "logps_train/policy_1_w": -123.056640625, "logps_train/policy_2_2": -132.04421997070312, "logps_train/policy_2_w": -193.82431030273438, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.2004051208496094, "rewards_train/1-l": -2.170506238937378, "rewards_train/1-w": 3.275390148162842, "rewards_train/2-2": 3.2467498779296875, "rewards_train/2-w": -0.9949304461479187, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.44589638710022, "rewards_train/margins_1": 4.475795269012451, "rewards_train/margins_2": 4.241680324077606, "step": 588 }, { "epoch": 1.76, "logps_train/policy_1_2": -143.14881896972656, "logps_train/policy_1_l": -179.82174682617188, "logps_train/policy_1_w": -109.22142791748047, "logps_train/policy_2_2": -94.62130737304688, "logps_train/policy_2_w": -164.0552978515625, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -155.0, "rewards_train/1-2": -0.6000384092330933, "rewards_train/1-l": -2.6679654121398926, "rewards_train/1-w": 3.3185977935791016, "rewards_train/2-2": 2.595290422439575, "rewards_train/2-w": -0.9239630699157715, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.986563205718994, "rewards_train/margins_1": 3.918636202812195, "rewards_train/margins_2": 3.5192534923553467, "step": 588 }, { "epoch": 1.76, "logps_train/policy_1_2": -174.8986358642578, "logps_train/policy_1_l": -162.58534240722656, "logps_train/policy_1_w": -127.86436462402344, "logps_train/policy_2_2": -97.82969665527344, "logps_train/policy_2_w": -192.28404235839844, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.9773640632629395, "rewards_train/1-l": -3.1788344383239746, "rewards_train/1-w": 2.9571194648742676, "rewards_train/2-2": 3.049452304840088, "rewards_train/2-w": -1.2948105335235596, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.135953903198242, "rewards_train/margins_1": 4.934483528137207, "rewards_train/margins_2": 4.3442628383636475, "step": 588 }, { "epoch": 1.76, "logps_train/policy_1_2": -212.56663513183594, "logps_train/policy_1_l": -208.25558471679688, "logps_train/policy_1_w": -123.67257690429688, "logps_train/policy_2_2": -133.9621124267578, "logps_train/policy_2_w": -200.005126953125, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -1.9687721729278564, "rewards_train/1-l": -3.2501673698425293, "rewards_train/1-w": 2.958767890930176, "rewards_train/2-2": 3.1881637573242188, "rewards_train/2-w": -1.893090009689331, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.208935260772705, "rewards_train/margins_1": 4.927540063858032, "rewards_train/margins_2": 5.08125376701355, "step": 588 }, { "epoch": 1.76, "logps_train/policy_1_2": -87.82717895507812, "logps_train/policy_1_l": -96.46929931640625, "logps_train/policy_1_w": -96.00474548339844, "logps_train/policy_2_2": -58.79024124145508, "logps_train/policy_2_w": -153.71310424804688, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -78.5, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -0.05576410889625549, "rewards_train/1-l": -1.8225159645080566, "rewards_train/1-w": 2.731947422027588, "rewards_train/2-2": 1.8098430633544922, "rewards_train/2-w": -1.6394754648208618, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.5544633865356445, "rewards_train/margins_1": 2.7877115309238434, "rewards_train/margins_2": 3.449318528175354, "step": 589 }, { "epoch": 1.76, "logps_train/policy_1_2": -165.4324951171875, "logps_train/policy_1_l": -170.9263458251953, "logps_train/policy_1_w": -122.59124755859375, "logps_train/policy_2_2": -107.23625183105469, "logps_train/policy_2_w": -196.21795654296875, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.430945873260498, "rewards_train/1-l": -2.0763020515441895, "rewards_train/1-w": 2.9569883346557617, "rewards_train/2-2": 2.761530876159668, "rewards_train/2-w": -1.5827336311340332, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.033290386199951, "rewards_train/margins_1": 4.38793420791626, "rewards_train/margins_2": 4.344264507293701, "step": 589 }, { "epoch": 1.76, "logps_train/policy_1_2": -198.65896606445312, "logps_train/policy_1_l": -200.82965087890625, "logps_train/policy_1_w": -104.77367401123047, "logps_train/policy_2_2": -133.017333984375, "logps_train/policy_2_w": -160.47970581054688, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.254177212715149, "rewards_train/1-l": -3.4614806175231934, "rewards_train/1-w": 3.2126717567443848, "rewards_train/2-2": 3.3556883335113525, "rewards_train/2-w": -0.38390934467315674, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.674152374267578, "rewards_train/margins_1": 4.466848969459534, "rewards_train/margins_2": 3.7395976781845093, "step": 589 }, { "epoch": 1.76, "logps_train/policy_1_2": -192.662841796875, "logps_train/policy_1_l": -150.9544677734375, "logps_train/policy_1_w": -110.07766723632812, "logps_train/policy_2_2": -116.10676574707031, "logps_train/policy_2_w": -172.41368103027344, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -1.694408893585205, "rewards_train/1-l": -1.8159546852111816, "rewards_train/1-w": 2.818014621734619, "rewards_train/2-2": 2.8236987590789795, "rewards_train/2-w": -1.4015246629714966, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.633969306945801, "rewards_train/margins_1": 4.512423515319824, "rewards_train/margins_2": 4.225223422050476, "step": 589 }, { "epoch": 1.77, "learning_rate": 1.9267109498579962e-07, "loss": 0.5541, "step": 590 }, { "epoch": 1.77, "logps_train/policy_1_2": -188.67352294921875, "logps_train/policy_1_l": -208.2071533203125, "logps_train/policy_1_w": -111.82791900634766, "logps_train/policy_2_2": -127.61459350585938, "logps_train/policy_2_w": -182.46739196777344, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -0.8465515375137329, "rewards_train/1-l": -3.215540647506714, "rewards_train/1-w": 3.441817283630371, "rewards_train/2-2": 3.257406711578369, "rewards_train/2-w": -1.065879464149475, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.657357931137085, "rewards_train/margins_1": 4.288368821144104, "rewards_train/margins_2": 4.323286175727844, "step": 590 }, { "epoch": 1.77, "logps_train/policy_1_2": -191.4325408935547, "logps_train/policy_1_l": -220.07936096191406, "logps_train/policy_1_w": -126.47879791259766, "logps_train/policy_2_2": -127.2051010131836, "logps_train/policy_2_w": -201.8662109375, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -0.7639579772949219, "rewards_train/1-l": -3.459498167037964, "rewards_train/1-w": 3.5044641494750977, "rewards_train/2-2": 3.3078103065490723, "rewards_train/2-w": -1.6217777729034424, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.9639623165130615, "rewards_train/margins_1": 4.2684221267700195, "rewards_train/margins_2": 4.929588079452515, "step": 590 }, { "epoch": 1.77, "logps_train/policy_1_2": -163.27093505859375, "logps_train/policy_1_l": -178.50535583496094, "logps_train/policy_1_w": -104.07914733886719, "logps_train/policy_2_2": -106.74163818359375, "logps_train/policy_2_w": -174.68988037109375, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.2235777378082275, "rewards_train/1-l": -2.219236373901367, "rewards_train/1-w": 2.7598588466644287, "rewards_train/2-2": 2.5234925746917725, "rewards_train/2-w": -1.4693797826766968, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.979095220565796, "rewards_train/margins_1": 3.9834365844726562, "rewards_train/margins_2": 3.9928723573684692, "step": 590 }, { "epoch": 1.77, "logps_train/policy_1_2": -229.7720184326172, "logps_train/policy_1_l": -197.56231689453125, "logps_train/policy_1_w": -157.56002807617188, "logps_train/policy_2_2": -157.12408447265625, "logps_train/policy_2_w": -239.25112915039062, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": -1.204545259475708, "rewards_train/1-l": -2.368112564086914, "rewards_train/1-w": 4.379087924957275, "rewards_train/2-2": 3.4563422203063965, "rewards_train/2-w": -1.0298738479614258, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.7472004890441895, "rewards_train/margins_1": 5.583633184432983, "rewards_train/margins_2": 4.486216068267822, "step": 590 }, { "epoch": 1.77, "logps_train/policy_1_2": -161.47708129882812, "logps_train/policy_1_l": -169.0113983154297, "logps_train/policy_1_w": -164.56613159179688, "logps_train/policy_2_2": -113.97654724121094, "logps_train/policy_2_w": -254.537841796875, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -232.0, "rewards_train/1-2": -0.6590360999107361, "rewards_train/1-l": -1.931290626525879, "rewards_train/1-w": 3.7790307998657227, "rewards_train/2-2": 2.2671899795532227, "rewards_train/2-w": -2.25070858001709, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.710321426391602, "rewards_train/margins_1": 4.438066899776459, "rewards_train/margins_2": 4.5178985595703125, "step": 591 }, { "epoch": 1.77, "logps_train/policy_1_2": -200.01356506347656, "logps_train/policy_1_l": -131.12594604492188, "logps_train/policy_1_w": -130.51522827148438, "logps_train/policy_2_2": -136.76258850097656, "logps_train/policy_2_w": -200.85565185546875, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.3357305526733398, "rewards_train/1-l": -1.73671555519104, "rewards_train/1-w": 3.1221108436584473, "rewards_train/2-2": 2.9143664836883545, "rewards_train/2-w": -1.704706072807312, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.858826398849487, "rewards_train/margins_1": 4.457841396331787, "rewards_train/margins_2": 4.6190725564956665, "step": 591 }, { "epoch": 1.77, "logps_train/policy_1_2": -227.37545776367188, "logps_train/policy_1_l": -239.78106689453125, "logps_train/policy_1_w": -158.23300170898438, "logps_train/policy_2_2": -144.78074645996094, "logps_train/policy_2_w": -263.80511474609375, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": -2.165280818939209, "rewards_train/1-l": -3.191783905029297, "rewards_train/1-w": 4.200137615203857, "rewards_train/2-2": 3.072120428085327, "rewards_train/2-w": -3.0453543663024902, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.391921520233154, "rewards_train/margins_1": 6.365418434143066, "rewards_train/margins_2": 6.117474794387817, "step": 591 }, { "epoch": 1.77, "logps_train/policy_1_2": -192.4435577392578, "logps_train/policy_1_l": -182.42237854003906, "logps_train/policy_1_w": -130.33473205566406, "logps_train/policy_2_2": -127.20552825927734, "logps_train/policy_2_w": -194.73867797851562, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -0.31974634528160095, "rewards_train/1-l": -2.306471347808838, "rewards_train/1-w": 3.277073860168457, "rewards_train/2-2": 3.7860875129699707, "rewards_train/2-w": -0.7720127701759338, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.583545207977295, "rewards_train/margins_1": 3.596820205450058, "rewards_train/margins_2": 4.5581002831459045, "step": 591 }, { "epoch": 1.77, "learning_rate": 1.832759814204166e-07, "loss": 0.5785, "step": 592 }, { "epoch": 1.77, "logps_train/policy_1_2": -192.2357177734375, "logps_train/policy_1_l": -135.26406860351562, "logps_train/policy_1_w": -114.14550018310547, "logps_train/policy_2_2": -109.55152893066406, "logps_train/policy_2_w": -194.82565307617188, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -2.3479866981506348, "rewards_train/1-l": -1.903433084487915, "rewards_train/1-w": 3.1420907974243164, "rewards_train/2-2": 3.180589199066162, "rewards_train/2-w": -2.4266090393066406, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.0455238819122314, "rewards_train/margins_1": 5.490077495574951, "rewards_train/margins_2": 5.607198238372803, "step": 592 }, { "epoch": 1.77, "logps_train/policy_1_2": -222.4407958984375, "logps_train/policy_1_l": -230.35928344726562, "logps_train/policy_1_w": -136.07855224609375, "logps_train/policy_2_2": -150.6224365234375, "logps_train/policy_2_w": -202.49066162109375, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -182.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": -1.2782597541809082, "rewards_train/1-l": -3.002725124359131, "rewards_train/1-w": 3.0201711654663086, "rewards_train/2-2": 3.1238880157470703, "rewards_train/2-w": -1.366692066192627, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.0228962898254395, "rewards_train/margins_1": 4.298430919647217, "rewards_train/margins_2": 4.490580081939697, "step": 592 }, { "epoch": 1.77, "logps_train/policy_1_2": -219.21414184570312, "logps_train/policy_1_l": -192.00918579101562, "logps_train/policy_1_w": -128.2117156982422, "logps_train/policy_2_2": -126.70948028564453, "logps_train/policy_2_w": -213.28781127929688, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -2.2811779975891113, "rewards_train/1-l": -3.2028708457946777, "rewards_train/1-w": 3.3376173973083496, "rewards_train/2-2": 3.2892088890075684, "rewards_train/2-w": -2.0303425788879395, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.540488243103027, "rewards_train/margins_1": 5.618795394897461, "rewards_train/margins_2": 5.319551467895508, "step": 592 }, { "epoch": 1.77, "logps_train/policy_1_2": -149.53463745117188, "logps_train/policy_1_l": -125.7225112915039, "logps_train/policy_1_w": -110.43997955322266, "logps_train/policy_2_2": -91.34886169433594, "logps_train/policy_2_w": -176.34161376953125, "logps_train/ref_1_2": -135.0, "logps_train/ref_1_l": -107.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -117.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.4387190341949463, "rewards_train/1-l": -1.860825538635254, "rewards_train/1-w": 2.932565212249756, "rewards_train/2-2": 2.5471208095550537, "rewards_train/2-w": -1.5599431991577148, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.79339075088501, "rewards_train/margins_1": 4.371284246444702, "rewards_train/margins_2": 4.1070640087127686, "step": 592 }, { "epoch": 1.78, "logps_train/policy_1_2": -133.0141143798828, "logps_train/policy_1_l": -118.29730987548828, "logps_train/policy_1_w": -83.21011352539062, "logps_train/policy_2_2": -78.94685363769531, "logps_train/policy_2_w": -159.92709350585938, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -109.5, "logps_train/ref_2_2": -98.5, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": -1.4787548780441284, "rewards_train/1-l": -1.8754289150238037, "rewards_train/1-w": 2.63094162940979, "rewards_train/2-2": 1.9486737251281738, "rewards_train/2-w": -2.1136083602905273, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.506370544433594, "rewards_train/margins_1": 4.1096965074539185, "rewards_train/margins_2": 4.062282085418701, "step": 593 }, { "epoch": 1.78, "logps_train/policy_1_2": -154.866943359375, "logps_train/policy_1_l": -123.89140319824219, "logps_train/policy_1_w": -88.09149169921875, "logps_train/policy_2_2": -92.55792236328125, "logps_train/policy_2_w": -140.3313446044922, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -118.5, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": -1.7249770164489746, "rewards_train/1-l": -2.3854289054870605, "rewards_train/1-w": 2.2247369289398193, "rewards_train/2-2": 2.598700523376465, "rewards_train/2-w": -1.2726845741271973, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.61016583442688, "rewards_train/margins_1": 3.949713945388794, "rewards_train/margins_2": 3.871385097503662, "step": 593 }, { "epoch": 1.78, "logps_train/policy_1_2": -149.94680786132812, "logps_train/policy_1_l": -130.09881591796875, "logps_train/policy_1_w": -123.30033874511719, "logps_train/policy_2_2": -96.3664779663086, "logps_train/policy_2_w": -195.6759033203125, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -1.098585844039917, "rewards_train/1-l": -1.8516781330108643, "rewards_train/1-w": 3.1371538639068604, "rewards_train/2-2": 2.3342018127441406, "rewards_train/2-w": -1.4839975833892822, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.988831996917725, "rewards_train/margins_1": 4.235739707946777, "rewards_train/margins_2": 3.818199396133423, "step": 593 }, { "epoch": 1.78, "logps_train/policy_1_2": -211.92388916015625, "logps_train/policy_1_l": -160.41290283203125, "logps_train/policy_1_w": -133.32403564453125, "logps_train/policy_2_2": -143.81448364257812, "logps_train/policy_2_w": -203.80250549316406, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.512701392173767, "rewards_train/1-l": -2.0364556312561035, "rewards_train/1-w": 3.3382983207702637, "rewards_train/2-2": 3.036763906478882, "rewards_train/2-w": -1.3642354011535645, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.374753952026367, "rewards_train/margins_1": 4.850999712944031, "rewards_train/margins_2": 4.400999307632446, "step": 593 }, { "epoch": 1.78, "learning_rate": 1.7410697603511383e-07, "loss": 0.5424, "step": 594 }, { "epoch": 1.78, "logps_train/policy_1_2": -178.4566650390625, "logps_train/policy_1_l": -261.269775390625, "logps_train/policy_1_w": -131.5015869140625, "logps_train/policy_2_2": -115.86328125, "logps_train/policy_2_w": -214.7389678955078, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -223.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.3759891986846924, "rewards_train/1-l": -3.831711769104004, "rewards_train/1-w": 3.3857779502868652, "rewards_train/2-2": 2.8116202354431152, "rewards_train/2-w": -1.7645223140716553, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.217489719390869, "rewards_train/margins_1": 4.761767148971558, "rewards_train/margins_2": 4.5761425495147705, "step": 594 }, { "epoch": 1.78, "logps_train/policy_1_2": -175.97503662109375, "logps_train/policy_1_l": -163.93960571289062, "logps_train/policy_1_w": -83.42516326904297, "logps_train/policy_2_2": -111.56957244873047, "logps_train/policy_2_w": -133.41131591796875, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -106.5, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -118.5, "rewards_train/1-2": -1.7225043773651123, "rewards_train/1-l": -2.5574870109558105, "rewards_train/1-w": 2.288147449493408, "rewards_train/2-2": 2.696558952331543, "rewards_train/2-w": -1.4981629848480225, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.845634460449219, "rewards_train/margins_1": 4.0106518268585205, "rewards_train/margins_2": 4.194721937179565, "step": 594 }, { "epoch": 1.78, "logps_train/policy_1_2": -150.7342529296875, "logps_train/policy_1_l": -189.64256286621094, "logps_train/policy_1_w": -142.19656372070312, "logps_train/policy_2_2": -93.43617248535156, "logps_train/policy_2_w": -209.87367248535156, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -176.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -1.084363579750061, "rewards_train/1-l": -2.6529276371002197, "rewards_train/1-w": 3.3359100818634033, "rewards_train/2-2": 2.637144088745117, "rewards_train/2-w": -1.5361952781677246, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.988837718963623, "rewards_train/margins_1": 4.420273661613464, "rewards_train/margins_2": 4.173339366912842, "step": 594 }, { "epoch": 1.78, "logps_train/policy_1_2": -175.92039489746094, "logps_train/policy_1_l": -149.82437133789062, "logps_train/policy_1_w": -100.15351867675781, "logps_train/policy_2_2": -109.04698944091797, "logps_train/policy_2_w": -156.20501708984375, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -2.075634002685547, "rewards_train/1-l": -2.029409170150757, "rewards_train/1-w": 2.3178510665893555, "rewards_train/2-2": 2.247253894805908, "rewards_train/2-w": -1.193158745765686, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.347260236740112, "rewards_train/margins_1": 4.393485069274902, "rewards_train/margins_2": 3.4404126405715942, "step": 594 }, { "epoch": 1.78, "logps_train/policy_1_2": -206.3986358642578, "logps_train/policy_1_l": -163.44418334960938, "logps_train/policy_1_w": -142.099365234375, "logps_train/policy_2_2": -134.36154174804688, "logps_train/policy_2_w": -216.02432250976562, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.3453325033187866, "rewards_train/1-l": -1.5398050546646118, "rewards_train/1-w": 3.252758502960205, "rewards_train/2-2": 3.3349409103393555, "rewards_train/2-w": -1.6000880002975464, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.792563557624817, "rewards_train/margins_1": 4.598091006278992, "rewards_train/margins_2": 4.935028910636902, "step": 595 }, { "epoch": 1.78, "logps_train/policy_1_2": -149.48428344726562, "logps_train/policy_1_l": -122.06617736816406, "logps_train/policy_1_w": -107.23535919189453, "logps_train/policy_2_2": -95.1864013671875, "logps_train/policy_2_w": -167.76031494140625, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -105.5, "logps_train/ref_1_w": -139.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -0.9544824957847595, "rewards_train/1-l": -1.6627700328826904, "rewards_train/1-w": 3.13466739654541, "rewards_train/2-2": 2.760071277618408, "rewards_train/2-w": -1.0713447332382202, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.797437429428101, "rewards_train/margins_1": 4.08914989233017, "rewards_train/margins_2": 3.8314160108566284, "step": 595 }, { "epoch": 1.78, "logps_train/policy_1_2": -139.01499938964844, "logps_train/policy_1_l": -137.3140869140625, "logps_train/policy_1_w": -99.54063415527344, "logps_train/policy_2_2": -91.55867004394531, "logps_train/policy_2_w": -163.05947875976562, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -114.5, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -0.6366565227508545, "rewards_train/1-l": -2.1101691722869873, "rewards_train/1-w": 3.0396859645843506, "rewards_train/2-2": 2.3019461631774902, "rewards_train/2-w": -1.688759446144104, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.149855136871338, "rewards_train/margins_1": 3.676342487335205, "rewards_train/margins_2": 3.9907056093215942, "step": 595 }, { "epoch": 1.78, "logps_train/policy_1_2": -162.61105346679688, "logps_train/policy_1_l": -196.82821655273438, "logps_train/policy_1_w": -105.34075164794922, "logps_train/policy_2_2": -110.93021392822266, "logps_train/policy_2_w": -162.09478759765625, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -174.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -0.9196984767913818, "rewards_train/1-l": -2.2717370986938477, "rewards_train/1-w": 2.4564037322998047, "rewards_train/2-2": 2.44096302986145, "rewards_train/2-w": -0.9184643030166626, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.728140830993652, "rewards_train/margins_1": 3.3761022090911865, "rewards_train/margins_2": 3.359427332878113, "step": 595 }, { "epoch": 1.78, "learning_rate": 1.651649737086533e-07, "loss": 0.505, "step": 596 }, { "epoch": 1.78, "logps_train/policy_1_2": -241.1380615234375, "logps_train/policy_1_l": -191.16336059570312, "logps_train/policy_1_w": -138.97589111328125, "logps_train/policy_2_2": -156.17095947265625, "logps_train/policy_2_w": -216.0889129638672, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.764587163925171, "rewards_train/1-l": -2.3649685382843018, "rewards_train/1-w": 3.1206722259521484, "rewards_train/2-2": 3.8541927337646484, "rewards_train/2-w": -1.6486374139785767, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.48564076423645, "rewards_train/margins_1": 4.885259389877319, "rewards_train/margins_2": 5.502830147743225, "step": 596 }, { "epoch": 1.78, "logps_train/policy_1_2": -205.51800537109375, "logps_train/policy_1_l": -204.38433837890625, "logps_train/policy_1_w": -125.26871490478516, "logps_train/policy_2_2": -143.72412109375, "logps_train/policy_2_w": -188.80081176757812, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -1.1014107465744019, "rewards_train/1-l": -2.4537644386291504, "rewards_train/1-w": 3.2674641609191895, "rewards_train/2-2": 3.047412395477295, "rewards_train/2-w": -0.9828159809112549, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.72122859954834, "rewards_train/margins_1": 4.368874907493591, "rewards_train/margins_2": 4.03022837638855, "step": 596 }, { "epoch": 1.78, "logps_train/policy_1_2": -160.25308227539062, "logps_train/policy_1_l": -160.68484497070312, "logps_train/policy_1_w": -122.32072448730469, "logps_train/policy_2_2": -95.20967102050781, "logps_train/policy_2_w": -196.77691650390625, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -1.7241610288619995, "rewards_train/1-l": -2.01330828666687, "rewards_train/1-w": 3.108161687850952, "rewards_train/2-2": 2.6746137142181396, "rewards_train/2-w": -1.809723138809204, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.121469974517822, "rewards_train/margins_1": 4.832322716712952, "rewards_train/margins_2": 4.484336853027344, "step": 596 }, { "epoch": 1.78, "logps_train/policy_1_2": -163.4464111328125, "logps_train/policy_1_l": -190.33596801757812, "logps_train/policy_1_w": -113.83535766601562, "logps_train/policy_2_2": -98.91127014160156, "logps_train/policy_2_w": -194.2609100341797, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.5950310230255127, "rewards_train/1-l": -2.5505640506744385, "rewards_train/1-w": 2.8912692070007324, "rewards_train/2-2": 2.6317248344421387, "rewards_train/2-w": -2.4622230529785156, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.441833257675171, "rewards_train/margins_1": 4.486300230026245, "rewards_train/margins_2": 5.093947887420654, "step": 596 }, { "epoch": 1.79, "logps_train/policy_1_2": -261.1956787109375, "logps_train/policy_1_l": -188.37646484375, "logps_train/policy_1_w": -155.98797607421875, "logps_train/policy_2_2": -184.1923065185547, "logps_train/policy_2_w": -227.6632537841797, "logps_train/ref_1_2": -245.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -220.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -1.5887064933776855, "rewards_train/1-l": -2.171241044998169, "rewards_train/1-w": 3.4802069664001465, "rewards_train/2-2": 3.579401969909668, "rewards_train/2-w": -1.0152509212493896, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.651448011398315, "rewards_train/margins_1": 5.068913459777832, "rewards_train/margins_2": 4.594652891159058, "step": 597 }, { "epoch": 1.79, "logps_train/policy_1_2": -191.78173828125, "logps_train/policy_1_l": -239.17242431640625, "logps_train/policy_1_w": -131.6988983154297, "logps_train/policy_2_2": -120.48551940917969, "logps_train/policy_2_w": -212.54058837890625, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -218.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.3107916116714478, "rewards_train/1-l": -2.1347241401672363, "rewards_train/1-w": 3.4430007934570312, "rewards_train/2-2": 3.194807529449463, "rewards_train/2-w": -1.2952708005905151, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.577724933624268, "rewards_train/margins_1": 4.753792405128479, "rewards_train/margins_2": 4.490078330039978, "step": 597 }, { "epoch": 1.79, "logps_train/policy_1_2": -214.3084716796875, "logps_train/policy_1_l": -205.0455780029297, "logps_train/policy_1_w": -149.9697723388672, "logps_train/policy_2_2": -144.1993865966797, "logps_train/policy_2_w": -232.47955322265625, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -1.7140493392944336, "rewards_train/1-l": -2.7016286849975586, "rewards_train/1-w": 3.660102367401123, "rewards_train/2-2": 3.0456862449645996, "rewards_train/2-w": -1.7979562282562256, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.361731052398682, "rewards_train/margins_1": 5.374151706695557, "rewards_train/margins_2": 4.843642473220825, "step": 597 }, { "epoch": 1.79, "logps_train/policy_1_2": -192.64129638671875, "logps_train/policy_1_l": -166.59146118164062, "logps_train/policy_1_w": -132.54672241210938, "logps_train/policy_2_2": -114.66846466064453, "logps_train/policy_2_w": -213.99334716796875, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -2.050457239151001, "rewards_train/1-l": -2.097622871398926, "rewards_train/1-w": 3.0367345809936523, "rewards_train/2-2": 2.8827624320983887, "rewards_train/2-w": -2.13761568069458, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.134357452392578, "rewards_train/margins_1": 5.087191820144653, "rewards_train/margins_2": 5.020378112792969, "step": 597 }, { "epoch": 1.79, "learning_rate": 1.5645084716469778e-07, "loss": 0.4984, "step": 598 }, { "epoch": 1.79, "logps_train/policy_1_2": -135.96087646484375, "logps_train/policy_1_l": -133.73880004882812, "logps_train/policy_1_w": -79.04054260253906, "logps_train/policy_2_2": -73.25502014160156, "logps_train/policy_2_w": -150.45413208007812, "logps_train/ref_1_2": -115.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -128.0, "rewards_train/1-2": -2.09682035446167, "rewards_train/1-l": -2.9914090633392334, "rewards_train/1-w": 2.6365714073181152, "rewards_train/2-2": 1.9638292789459229, "rewards_train/2-w": -2.180959701538086, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.627980470657349, "rewards_train/margins_1": 4.733391761779785, "rewards_train/margins_2": 4.144788980484009, "step": 598 }, { "epoch": 1.79, "logps_train/policy_1_2": -230.75010681152344, "logps_train/policy_1_l": -183.86386108398438, "logps_train/policy_1_w": -152.13955688476562, "logps_train/policy_2_2": -152.33984375, "logps_train/policy_2_w": -226.98184204101562, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -191.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": -1.5961047410964966, "rewards_train/1-l": -2.184896945953369, "rewards_train/1-w": 3.643612861633301, "rewards_train/2-2": 3.849609375, "rewards_train/2-w": -1.3692771196365356, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.82850980758667, "rewards_train/margins_1": 5.239717602729797, "rewards_train/margins_2": 5.218886494636536, "step": 598 }, { "epoch": 1.79, "logps_train/policy_1_2": -207.30227661132812, "logps_train/policy_1_l": -175.78753662109375, "logps_train/policy_1_w": -138.39694213867188, "logps_train/policy_2_2": -141.21380615234375, "logps_train/policy_2_w": -211.17376708984375, "logps_train/ref_1_2": -196.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -174.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.116164207458496, "rewards_train/1-l": -1.748772382736206, "rewards_train/1-w": 3.6538608074188232, "rewards_train/2-2": 3.2590885162353516, "rewards_train/2-w": -1.1542894840240479, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.402633190155029, "rewards_train/margins_1": 4.770025014877319, "rewards_train/margins_2": 4.413378000259399, "step": 598 }, { "epoch": 1.79, "logps_train/policy_1_2": -180.64700317382812, "logps_train/policy_1_l": -172.470458984375, "logps_train/policy_1_w": -111.85611724853516, "logps_train/policy_2_2": -121.40918731689453, "logps_train/policy_2_w": -172.82290649414062, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.102201223373413, "rewards_train/1-l": -2.576098680496216, "rewards_train/1-w": 3.046419143676758, "rewards_train/2-2": 2.8239245414733887, "rewards_train/2-w": -1.0561177730560303, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.622517824172974, "rewards_train/margins_1": 4.148620367050171, "rewards_train/margins_2": 3.880042314529419, "step": 598 }, { "epoch": 1.79, "logps_train/policy_1_2": -177.60572814941406, "logps_train/policy_1_l": -195.69874572753906, "logps_train/policy_1_w": -123.74436950683594, "logps_train/policy_2_2": -99.3339614868164, "logps_train/policy_2_w": -221.94407653808594, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -125.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -2.4453377723693848, "rewards_train/1-l": -3.0838890075683594, "rewards_train/1-w": 3.2767345905303955, "rewards_train/2-2": 2.5740256309509277, "rewards_train/2-w": -2.8444082736968994, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.360623598098755, "rewards_train/margins_1": 5.72207236289978, "rewards_train/margins_2": 5.418433904647827, "step": 599 }, { "epoch": 1.79, "logps_train/policy_1_2": -174.8103485107422, "logps_train/policy_1_l": -149.01321411132812, "logps_train/policy_1_w": -83.9646987915039, "logps_train/policy_2_2": -115.1458511352539, "logps_train/policy_2_w": -142.24575805664062, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": -1.6070119142532349, "rewards_train/1-l": -2.058450222015381, "rewards_train/1-w": 2.3509912490844727, "rewards_train/2-2": 2.4264307022094727, "rewards_train/2-w": -1.6343402862548828, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.4094414710998535, "rewards_train/margins_1": 3.9580031633377075, "rewards_train/margins_2": 4.0607709884643555, "step": 599 }, { "epoch": 1.79, "logps_train/policy_1_2": -169.60128784179688, "logps_train/policy_1_l": -203.18984985351562, "logps_train/policy_1_w": -93.76029205322266, "logps_train/policy_2_2": -113.1141357421875, "logps_train/policy_2_w": -147.15762329101562, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -1.61989426612854, "rewards_train/1-l": -3.0965723991394043, "rewards_train/1-w": 2.4237756729125977, "rewards_train/2-2": 2.3583128452301025, "rewards_train/2-w": -0.9643946886062622, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.520348072052002, "rewards_train/margins_1": 4.043669939041138, "rewards_train/margins_2": 3.3227075338363647, "step": 599 }, { "epoch": 1.79, "logps_train/policy_1_2": -185.6155548095703, "logps_train/policy_1_l": -175.64511108398438, "logps_train/policy_1_w": -130.86793518066406, "logps_train/policy_2_2": -125.32908630371094, "logps_train/policy_2_w": -208.98069763183594, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -1.226986050605774, "rewards_train/1-l": -2.4413657188415527, "rewards_train/1-w": 3.3901593685150146, "rewards_train/2-2": 2.7226827144622803, "rewards_train/2-w": -1.6625232696533203, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.831525087356567, "rewards_train/margins_1": 4.617145419120789, "rewards_train/margins_2": 4.385205984115601, "step": 599 }, { "epoch": 1.8, "learning_rate": 1.4796544688663623e-07, "loss": 0.5829, "step": 600 }, { "epoch": 1.8, "logps_train/policy_1_2": -161.28106689453125, "logps_train/policy_1_l": -166.38084411621094, "logps_train/policy_1_w": -137.08377075195312, "logps_train/policy_2_2": -108.60008239746094, "logps_train/policy_2_w": -228.3113250732422, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -0.8253717422485352, "rewards_train/1-l": -2.147068738937378, "rewards_train/1-w": 3.4154505729675293, "rewards_train/2-2": 2.3581557273864746, "rewards_train/2-w": -2.1487112045288086, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.562519311904907, "rewards_train/margins_1": 4.2408223152160645, "rewards_train/margins_2": 4.506866931915283, "step": 600 }, { "epoch": 1.8, "logps_train/policy_1_2": -175.70639038085938, "logps_train/policy_1_l": -131.09361267089844, "logps_train/policy_1_w": -94.43333435058594, "logps_train/policy_2_2": -104.33861541748047, "logps_train/policy_2_w": -161.67909240722656, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -112.5, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -1.5374358892440796, "rewards_train/1-l": -1.873081922531128, "rewards_train/1-w": 2.9344985485076904, "rewards_train/2-2": 2.893482208251953, "rewards_train/2-w": -1.5794317722320557, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.807580471038818, "rewards_train/margins_1": 4.47193443775177, "rewards_train/margins_2": 4.472913980484009, "step": 600 }, { "epoch": 1.8, "logps_train/policy_1_2": -214.37271118164062, "logps_train/policy_1_l": -171.78213500976562, "logps_train/policy_1_w": -120.62772369384766, "logps_train/policy_2_2": -125.67862701416016, "logps_train/policy_2_w": -199.32774353027344, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -3.0599284172058105, "rewards_train/1-l": -2.3115148544311523, "rewards_train/1-w": 3.0899622440338135, "rewards_train/2-2": 3.024324893951416, "rewards_train/2-w": -2.286680221557617, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.401477098464966, "rewards_train/margins_1": 6.149890661239624, "rewards_train/margins_2": 5.311005115509033, "step": 600 }, { "epoch": 1.8, "logps_train/policy_1_2": -179.9112548828125, "logps_train/policy_1_l": -159.3093719482422, "logps_train/policy_1_w": -110.37425994873047, "logps_train/policy_2_2": -113.48649597167969, "logps_train/policy_2_w": -172.19757080078125, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -1.726671814918518, "rewards_train/1-l": -1.6986134052276611, "rewards_train/1-w": 2.2478764057159424, "rewards_train/2-2": 2.5907058715820312, "rewards_train/2-w": -1.909503698348999, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.9464898109436035, "rewards_train/margins_1": 3.9745482206344604, "rewards_train/margins_2": 4.50020956993103, "step": 600 }, { "epoch": 1.8, "logps_train/policy_1_2": -208.99082946777344, "logps_train/policy_1_l": -177.90187072753906, "logps_train/policy_1_w": -125.73360443115234, "logps_train/policy_2_2": -143.70237731933594, "logps_train/policy_2_w": -195.30056762695312, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.5834574699401855, "rewards_train/1-l": -2.2119643688201904, "rewards_train/1-w": 2.737576961517334, "rewards_train/2-2": 2.805152654647827, "rewards_train/2-w": -1.3613076210021973, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.949541330337524, "rewards_train/margins_1": 4.3210344314575195, "rewards_train/margins_2": 4.166460275650024, "step": 601 }, { "epoch": 1.8, "logps_train/policy_1_2": -191.4158172607422, "logps_train/policy_1_l": -156.2602081298828, "logps_train/policy_1_w": -101.47391510009766, "logps_train/policy_2_2": -112.87413787841797, "logps_train/policy_2_w": -161.36354064941406, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -2.514237403869629, "rewards_train/1-l": -2.751802921295166, "rewards_train/1-w": 2.667257308959961, "rewards_train/2-2": 2.8180551528930664, "rewards_train/2-w": -1.4724867343902588, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.419060230255127, "rewards_train/margins_1": 5.18149471282959, "rewards_train/margins_2": 4.290541887283325, "step": 601 }, { "epoch": 1.8, "logps_train/policy_1_2": -158.50840759277344, "logps_train/policy_1_l": -143.5064697265625, "logps_train/policy_1_w": -91.50394439697266, "logps_train/policy_2_2": -104.25326538085938, "logps_train/policy_2_w": -155.81668090820312, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.0227159261703491, "rewards_train/1-l": -2.4918580055236816, "rewards_train/1-w": 2.5300745964050293, "rewards_train/2-2": 2.472330093383789, "rewards_train/2-w": -1.5510046482086182, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.021932601928711, "rewards_train/margins_1": 3.5527905225753784, "rewards_train/margins_2": 4.023334741592407, "step": 601 }, { "epoch": 1.8, "logps_train/policy_1_2": -261.9244384765625, "logps_train/policy_1_l": -182.7598876953125, "logps_train/policy_1_w": -126.1699447631836, "logps_train/policy_2_2": -165.10879516601562, "logps_train/policy_2_w": -200.6526336669922, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -205.0, "logps_train/ref_2_w": -185.0, "rewards_train/1-2": -2.738144874572754, "rewards_train/1-l": -2.5903453826904297, "rewards_train/1-w": 3.0975570678710938, "rewards_train/2-2": 3.9930267333984375, "rewards_train/2-w": -1.5873833894729614, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.687902450561523, "rewards_train/margins_1": 5.835701942443848, "rewards_train/margins_2": 5.580410122871399, "step": 601 }, { "epoch": 1.8, "learning_rate": 1.3970960103457722e-07, "loss": 0.5225, "step": 602 }, { "epoch": 1.8, "logps_train/policy_1_2": -211.45924377441406, "logps_train/policy_1_l": -182.39715576171875, "logps_train/policy_1_w": -110.12702941894531, "logps_train/policy_2_2": -132.4916534423828, "logps_train/policy_2_w": -186.76446533203125, "logps_train/ref_1_2": -190.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -2.159986972808838, "rewards_train/1-l": -1.883954405784607, "rewards_train/1-w": 3.0966720581054688, "rewards_train/2-2": 3.0191943645477295, "rewards_train/2-w": -1.29988431930542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.980626463890076, "rewards_train/margins_1": 5.256659030914307, "rewards_train/margins_2": 4.319078683853149, "step": 602 }, { "epoch": 1.8, "logps_train/policy_1_2": -163.07418823242188, "logps_train/policy_1_l": -179.34860229492188, "logps_train/policy_1_w": -106.32838439941406, "logps_train/policy_2_2": -109.09549713134766, "logps_train/policy_2_w": -168.9098358154297, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -150.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -1.2072234153747559, "rewards_train/1-l": -2.934568405151367, "rewards_train/1-w": 2.909691333770752, "rewards_train/2-2": 2.7519736289978027, "rewards_train/2-w": -1.055436611175537, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.844259738922119, "rewards_train/margins_1": 4.116914749145508, "rewards_train/margins_2": 3.80741024017334, "step": 602 }, { "epoch": 1.8, "logps_train/policy_1_2": -171.7248077392578, "logps_train/policy_1_l": -182.9955291748047, "logps_train/policy_1_w": -120.7393798828125, "logps_train/policy_2_2": -101.23805236816406, "logps_train/policy_2_w": -198.7470703125, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -127.5, "logps_train/ref_2_w": -175.0, "rewards_train/1-2": -1.8927936553955078, "rewards_train/1-l": -2.305119276046753, "rewards_train/1-w": 2.977233648300171, "rewards_train/2-2": 2.6472883224487305, "rewards_train/2-w": -2.3602547645568848, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.282352924346924, "rewards_train/margins_1": 4.870027303695679, "rewards_train/margins_2": 5.007543087005615, "step": 602 }, { "epoch": 1.8, "logps_train/policy_1_2": -246.0342559814453, "logps_train/policy_1_l": -262.8284912109375, "logps_train/policy_1_w": -174.63418579101562, "logps_train/policy_2_2": -153.64410400390625, "logps_train/policy_2_w": -262.63580322265625, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -232.0, "logps_train/ref_1_w": -210.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -245.0, "rewards_train/1-2": -2.2213940620422363, "rewards_train/1-l": -3.05145001411438, "rewards_train/1-w": 3.505331039428711, "rewards_train/2-2": 3.9107859134674072, "rewards_train/2-w": -1.7963933944702148, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.556781053543091, "rewards_train/margins_1": 5.726725101470947, "rewards_train/margins_2": 5.707179307937622, "step": 602 }, { "epoch": 1.81, "logps_train/policy_1_2": -197.97076416015625, "logps_train/policy_1_l": -151.82000732421875, "logps_train/policy_1_w": -118.2177963256836, "logps_train/policy_2_2": -133.35305786132812, "logps_train/policy_2_w": -191.3159637451172, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.4705133438110352, "rewards_train/1-l": -2.0514345169067383, "rewards_train/1-w": 3.1922829151153564, "rewards_train/2-2": 2.824850559234619, "rewards_train/2-w": -1.5917526483535767, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.243717432022095, "rewards_train/margins_1": 4.662796258926392, "rewards_train/margins_2": 4.416603207588196, "step": 603 }, { "epoch": 1.81, "logps_train/policy_1_2": -167.99822998046875, "logps_train/policy_1_l": -172.59750366210938, "logps_train/policy_1_w": -140.5294647216797, "logps_train/policy_2_2": -105.76437377929688, "logps_train/policy_2_w": -211.80630493164062, "logps_train/ref_1_2": -157.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.0728697776794434, "rewards_train/1-l": -1.8361170291900635, "rewards_train/1-w": 3.227229118347168, "rewards_train/2-2": 2.7020785808563232, "rewards_train/2-w": -1.4387609958648682, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.0633461475372314, "rewards_train/margins_1": 4.300098896026611, "rewards_train/margins_2": 4.140839576721191, "step": 603 }, { "epoch": 1.81, "logps_train/policy_1_2": -124.51525115966797, "logps_train/policy_1_l": -131.10386657714844, "logps_train/policy_1_w": -97.6560287475586, "logps_train/policy_2_2": -75.57919311523438, "logps_train/policy_2_w": -163.93771362304688, "logps_train/ref_1_2": -109.0, "logps_train/ref_1_l": -112.0, "logps_train/ref_1_w": -122.5, "logps_train/ref_2_2": -93.5, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -1.5683221817016602, "rewards_train/1-l": -1.8889265060424805, "rewards_train/1-w": 2.4817357063293457, "rewards_train/2-2": 1.797353982925415, "rewards_train/2-w": -1.9997279644012451, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.370662212371826, "rewards_train/margins_1": 4.050057888031006, "rewards_train/margins_2": 3.79708194732666, "step": 603 }, { "epoch": 1.81, "logps_train/policy_1_2": -139.77810668945312, "logps_train/policy_1_l": -172.2023468017578, "logps_train/policy_1_w": -102.33419799804688, "logps_train/policy_2_2": -98.35594177246094, "logps_train/policy_2_w": -151.39495849609375, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -128.0, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -0.8847431540489197, "rewards_train/1-l": -1.8874222040176392, "rewards_train/1-w": 2.6226344108581543, "rewards_train/2-2": 1.99907386302948, "rewards_train/2-w": -0.9914490580558777, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.5100566148757935, "rewards_train/margins_1": 3.507377564907074, "rewards_train/margins_2": 2.9905229210853577, "step": 603 }, { "epoch": 1.81, "learning_rate": 1.3168411536452153e-07, "loss": 0.5975, "step": 604 }, { "epoch": 1.81, "logps_train/policy_1_2": -149.99343872070312, "logps_train/policy_1_l": -137.8846435546875, "logps_train/policy_1_w": -93.31626892089844, "logps_train/policy_2_2": -103.16496276855469, "logps_train/policy_2_w": -155.10528564453125, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -121.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -1.0774697065353394, "rewards_train/1-l": -1.8350460529327393, "rewards_train/1-w": 2.8047006130218506, "rewards_train/2-2": 1.9965898990631104, "rewards_train/2-w": -1.1558406352996826, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.63974666595459, "rewards_train/margins_1": 3.88217031955719, "rewards_train/margins_2": 3.152430534362793, "step": 604 }, { "epoch": 1.81, "logps_train/policy_1_2": -252.918701171875, "logps_train/policy_1_l": -223.7437744140625, "logps_train/policy_1_w": -139.35874938964844, "logps_train/policy_2_2": -152.69566345214844, "logps_train/policy_2_w": -233.13214111328125, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -196.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -2.69968318939209, "rewards_train/1-l": -2.9075798988342285, "rewards_train/1-w": 3.567641019821167, "rewards_train/2-2": 4.31324577331543, "rewards_train/2-w": -2.6749320030212402, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.4752209186553955, "rewards_train/margins_1": 6.267324209213257, "rewards_train/margins_2": 6.98817777633667, "step": 604 }, { "epoch": 1.81, "logps_train/policy_1_2": -158.37234497070312, "logps_train/policy_1_l": -218.18362426757812, "logps_train/policy_1_w": -129.4832305908203, "logps_train/policy_2_2": -99.64097595214844, "logps_train/policy_2_w": -200.84378051757812, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -187.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -0.9980748295783997, "rewards_train/1-l": -3.111527442932129, "rewards_train/1-w": 3.0973801612854004, "rewards_train/2-2": 2.7054340839385986, "rewards_train/2-w": -1.6546905040740967, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.208907604217529, "rewards_train/margins_1": 4.0954549908638, "rewards_train/margins_2": 4.360124588012695, "step": 604 }, { "epoch": 1.81, "logps_train/policy_1_2": -211.33494567871094, "logps_train/policy_1_l": -181.043701171875, "logps_train/policy_1_w": -142.81304931640625, "logps_train/policy_2_2": -134.986328125, "logps_train/policy_2_w": -231.0360107421875, "logps_train/ref_1_2": -188.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": -2.3014636039733887, "rewards_train/1-l": -2.327220916748047, "rewards_train/1-w": 3.5761172771453857, "rewards_train/2-2": 2.852931261062622, "rewards_train/2-w": -1.9700067043304443, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.903338193893433, "rewards_train/margins_1": 5.877580881118774, "rewards_train/margins_2": 4.822937965393066, "step": 604 }, { "epoch": 1.81, "logps_train/policy_1_2": -249.30801391601562, "logps_train/policy_1_l": -263.91778564453125, "logps_train/policy_1_w": -152.7130126953125, "logps_train/policy_2_2": -165.82476806640625, "logps_train/policy_2_w": -233.35577392578125, "logps_train/ref_1_2": -232.0, "logps_train/ref_1_l": -236.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -200.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -1.772306203842163, "rewards_train/1-l": -2.7955379486083984, "rewards_train/1-w": 3.681433916091919, "rewards_train/2-2": 3.4582467079162598, "rewards_train/2-w": -1.522686243057251, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.476971864700317, "rewards_train/margins_1": 5.453740119934082, "rewards_train/margins_2": 4.980932950973511, "step": 605 }, { "epoch": 1.81, "logps_train/policy_1_2": -155.39312744140625, "logps_train/policy_1_l": -125.6500473022461, "logps_train/policy_1_w": -101.68556213378906, "logps_train/policy_2_2": -104.48683166503906, "logps_train/policy_2_w": -160.97093200683594, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -107.5, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -127.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.1796448230743408, "rewards_train/1-l": -1.8282369375228882, "rewards_train/1-w": 2.9125959873199463, "rewards_train/2-2": 2.256223678588867, "rewards_train/2-w": -1.2297838926315308, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.7408329248428345, "rewards_train/margins_1": 4.092240810394287, "rewards_train/margins_2": 3.486007571220398, "step": 605 }, { "epoch": 1.81, "logps_train/policy_1_2": -114.7690658569336, "logps_train/policy_1_l": -138.2008056640625, "logps_train/policy_1_w": -131.93858337402344, "logps_train/policy_2_2": -62.493980407714844, "logps_train/policy_2_w": -196.30282592773438, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -85.5, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.0602082014083862, "rewards_train/1-l": -1.322912573814392, "rewards_train/1-w": 3.2389540672302246, "rewards_train/2-2": 2.3098180294036865, "rewards_train/2-w": -1.4654384851455688, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.561866641044617, "rewards_train/margins_1": 4.299162268638611, "rewards_train/margins_2": 3.7752565145492554, "step": 605 }, { "epoch": 1.81, "logps_train/policy_1_2": -172.30331420898438, "logps_train/policy_1_l": -141.332275390625, "logps_train/policy_1_w": -96.10222625732422, "logps_train/policy_2_2": -110.91793823242188, "logps_train/policy_2_w": -154.75758361816406, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -125.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -137.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.6029870510101318, "rewards_train/1-l": -1.6128898859024048, "rewards_train/1-w": 2.905597686767578, "rewards_train/2-2": 2.644143581390381, "rewards_train/2-w": -0.7159931659698486, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.518487572669983, "rewards_train/margins_1": 4.50858473777771, "rewards_train/margins_2": 3.3601367473602295, "step": 605 }, { "epoch": 1.81, "learning_rate": 1.238897731497224e-07, "loss": 0.5102, "step": 606 }, { "epoch": 1.81, "logps_train/policy_1_2": -164.9368896484375, "logps_train/policy_1_l": -162.60763549804688, "logps_train/policy_1_w": -140.00457763671875, "logps_train/policy_2_2": -97.4139633178711, "logps_train/policy_2_w": -225.83905029296875, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -117.5, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -2.08939266204834, "rewards_train/1-l": -2.2813286781311035, "rewards_train/1-w": 3.071220874786377, "rewards_train/2-2": 2.001492500305176, "rewards_train/2-w": -2.283514976501465, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.3525495529174805, "rewards_train/margins_1": 5.160613536834717, "rewards_train/margins_2": 4.285007476806641, "step": 606 }, { "epoch": 1.81, "logps_train/policy_1_2": -173.54446411132812, "logps_train/policy_1_l": -181.84628295898438, "logps_train/policy_1_w": -120.63615417480469, "logps_train/policy_2_2": -118.96033477783203, "logps_train/policy_2_w": -172.51992797851562, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": -0.8563991189002991, "rewards_train/1-l": -2.470273017883301, "rewards_train/1-w": 2.7514235973358154, "rewards_train/2-2": 3.134239673614502, "rewards_train/2-w": -0.6016013026237488, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.221696615219116, "rewards_train/margins_1": 3.6078227162361145, "rewards_train/margins_2": 3.7358409762382507, "step": 606 }, { "epoch": 1.81, "logps_train/policy_1_2": -204.6474609375, "logps_train/policy_1_l": -160.32302856445312, "logps_train/policy_1_w": -102.5252456665039, "logps_train/policy_2_2": -134.45899963378906, "logps_train/policy_2_w": -182.068115234375, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -1.7999016046524048, "rewards_train/1-l": -2.182889699935913, "rewards_train/1-w": 2.9545071125030518, "rewards_train/2-2": 2.9861316680908203, "rewards_train/2-w": -1.8993899822235107, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.137396812438965, "rewards_train/margins_1": 4.7544087171554565, "rewards_train/margins_2": 4.885521650314331, "step": 606 }, { "epoch": 1.81, "logps_train/policy_1_2": -137.74415588378906, "logps_train/policy_1_l": -122.17554473876953, "logps_train/policy_1_w": -110.72119140625, "logps_train/policy_2_2": -73.43515014648438, "logps_train/policy_2_w": -192.90907287597656, "logps_train/ref_1_2": -118.0, "logps_train/ref_1_l": -99.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -94.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.9869152307510376, "rewards_train/1-l": -2.2448983192443848, "rewards_train/1-w": 3.100659132003784, "rewards_train/2-2": 2.078165054321289, "rewards_train/2-w": -2.470594882965088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.345557451248169, "rewards_train/margins_1": 5.087574362754822, "rewards_train/margins_2": 4.548759937286377, "step": 606 }, { "epoch": 1.82, "logps_train/policy_1_2": -182.55819702148438, "logps_train/policy_1_l": -137.08746337890625, "logps_train/policy_1_w": -112.64253997802734, "logps_train/policy_2_2": -117.96548461914062, "logps_train/policy_2_w": -170.64959716796875, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -118.5, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -1.6903889179229736, "rewards_train/1-l": -1.865765929222107, "rewards_train/1-w": 3.004300117492676, "rewards_train/2-2": 2.5934901237487793, "rewards_train/2-w": -0.9497252702713013, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.870066046714783, "rewards_train/margins_1": 4.694689035415649, "rewards_train/margins_2": 3.5432153940200806, "step": 607 }, { "epoch": 1.82, "logps_train/policy_1_2": -199.89825439453125, "logps_train/policy_1_l": -145.19239807128906, "logps_train/policy_1_w": -83.15714263916016, "logps_train/policy_2_2": -128.3160400390625, "logps_train/policy_2_w": -132.51168823242188, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -123.5, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -117.5, "rewards_train/1-2": -2.6331849098205566, "rewards_train/1-l": -2.1411149501800537, "rewards_train/1-w": 1.7846766710281372, "rewards_train/2-2": 2.3896851539611816, "rewards_train/2-w": -1.487497091293335, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.925791621208191, "rewards_train/margins_1": 4.417861580848694, "rewards_train/margins_2": 3.8771822452545166, "step": 607 }, { "epoch": 1.82, "logps_train/policy_1_2": -217.08505249023438, "logps_train/policy_1_l": -252.41131591796875, "logps_train/policy_1_w": -173.5054931640625, "logps_train/policy_2_2": -151.93829345703125, "logps_train/policy_2_w": -241.0811767578125, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -226.0, "logps_train/ref_1_w": -213.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": -0.8499101400375366, "rewards_train/1-l": -2.7133963108062744, "rewards_train/1-w": 3.9658570289611816, "rewards_train/2-2": 3.5678887367248535, "rewards_train/2-w": -0.7085083723068237, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.679253339767456, "rewards_train/margins_1": 4.815767168998718, "rewards_train/margins_2": 4.276397109031677, "step": 607 }, { "epoch": 1.82, "logps_train/policy_1_2": -159.75181579589844, "logps_train/policy_1_l": -168.45077514648438, "logps_train/policy_1_w": -102.32413482666016, "logps_train/policy_2_2": -105.07658386230469, "logps_train/policy_2_w": -155.0506591796875, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -1.2083849906921387, "rewards_train/1-l": -2.9665796756744385, "rewards_train/1-w": 2.6593832969665527, "rewards_train/2-2": 2.732771396636963, "rewards_train/2-w": -0.8566282391548157, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.625962972640991, "rewards_train/margins_1": 3.8677682876586914, "rewards_train/margins_2": 3.5893996357917786, "step": 607 }, { "epoch": 1.82, "learning_rate": 1.1632733510423932e-07, "loss": 0.5678, "step": 608 }, { "epoch": 1.82, "logps_train/policy_1_2": -200.81829833984375, "logps_train/policy_1_l": -211.69117736816406, "logps_train/policy_1_w": -123.69796752929688, "logps_train/policy_2_2": -132.65008544921875, "logps_train/policy_2_w": -196.98594665527344, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -190.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -1.6299508810043335, "rewards_train/1-l": -2.2113780975341797, "rewards_train/1-w": 2.9114527702331543, "rewards_train/2-2": 2.675666570663452, "rewards_train/2-w": -1.995469331741333, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.122830867767334, "rewards_train/margins_1": 4.541403651237488, "rewards_train/margins_2": 4.671135902404785, "step": 608 }, { "epoch": 1.82, "logps_train/policy_1_2": -95.25465393066406, "logps_train/policy_1_l": -123.89138793945312, "logps_train/policy_1_w": -77.98865509033203, "logps_train/policy_2_2": -61.08229446411133, "logps_train/policy_2_w": -140.7001495361328, "logps_train/ref_1_2": -87.5, "logps_train/ref_1_l": -104.5, "logps_train/ref_1_w": -99.5, "logps_train/ref_2_2": -77.0, "logps_train/ref_2_w": -119.5, "rewards_train/1-2": -0.7953876256942749, "rewards_train/1-l": -1.9225369691848755, "rewards_train/1-w": 2.133312463760376, "rewards_train/2-2": 1.5712629556655884, "rewards_train/2-w": -2.1123971939086914, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.0558494329452515, "rewards_train/margins_1": 2.928700089454651, "rewards_train/margins_2": 3.68366014957428, "step": 608 }, { "epoch": 1.82, "logps_train/policy_1_2": -176.1535186767578, "logps_train/policy_1_l": -174.22830200195312, "logps_train/policy_1_w": -93.08998107910156, "logps_train/policy_2_2": -102.16227722167969, "logps_train/policy_2_w": -156.29595947265625, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -2.004218578338623, "rewards_train/1-l": -2.7056431770324707, "rewards_train/1-w": 2.410874843597412, "rewards_train/2-2": 2.6283037662506104, "rewards_train/2-w": -1.6132872104644775, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.116518020629883, "rewards_train/margins_1": 4.415093421936035, "rewards_train/margins_2": 4.241590976715088, "step": 608 }, { "epoch": 1.82, "logps_train/policy_1_2": -129.68467712402344, "logps_train/policy_1_l": -132.51211547851562, "logps_train/policy_1_w": -96.29170989990234, "logps_train/policy_2_2": -82.36114501953125, "logps_train/policy_2_w": -150.01878356933594, "logps_train/ref_1_2": -120.0, "logps_train/ref_1_l": -116.0, "logps_train/ref_1_w": -121.5, "logps_train/ref_2_2": -105.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -0.9885854125022888, "rewards_train/1-l": -1.6640294790267944, "rewards_train/1-w": 2.5552773475646973, "rewards_train/2-2": 2.2410330772399902, "rewards_train/2-w": -0.8724837303161621, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.219306826591492, "rewards_train/margins_1": 3.543862760066986, "rewards_train/margins_2": 3.1135168075561523, "step": 608 }, { "epoch": 1.82, "logps_train/policy_1_2": -201.80935668945312, "logps_train/policy_1_l": -189.25588989257812, "logps_train/policy_1_w": -109.22311401367188, "logps_train/policy_2_2": -125.61198425292969, "logps_train/policy_2_w": -178.2271270751953, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -2.418436050415039, "rewards_train/1-l": -3.014822483062744, "rewards_train/1-w": 2.6415557861328125, "rewards_train/2-2": 2.9930992126464844, "rewards_train/2-w": -1.5801339149475098, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.656378269195557, "rewards_train/margins_1": 5.059991836547852, "rewards_train/margins_2": 4.573233127593994, "step": 609 }, { "epoch": 1.82, "logps_train/policy_1_2": -156.44580078125, "logps_train/policy_1_l": -203.66868591308594, "logps_train/policy_1_w": -96.27999114990234, "logps_train/policy_2_2": -107.14651489257812, "logps_train/policy_2_w": -152.93209838867188, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -175.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -1.0254390239715576, "rewards_train/1-l": -2.8278918266296387, "rewards_train/1-w": 2.4244542121887207, "rewards_train/2-2": 2.2270472049713135, "rewards_train/2-w": -1.045487403869629, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.252346038818359, "rewards_train/margins_1": 3.4498932361602783, "rewards_train/margins_2": 3.2725346088409424, "step": 609 }, { "epoch": 1.82, "logps_train/policy_1_2": -208.91217041015625, "logps_train/policy_1_l": -216.06417846679688, "logps_train/policy_1_w": -154.15386962890625, "logps_train/policy_2_2": -133.65049743652344, "logps_train/policy_2_w": -244.92013549804688, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": -1.5470772981643677, "rewards_train/1-l": -3.443918228149414, "rewards_train/1-w": 3.660393714904785, "rewards_train/2-2": 3.2130751609802246, "rewards_train/2-w": -2.142014741897583, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.104311943054199, "rewards_train/margins_1": 5.207471013069153, "rewards_train/margins_2": 5.355089902877808, "step": 609 }, { "epoch": 1.82, "logps_train/policy_1_2": -108.71490478515625, "logps_train/policy_1_l": -81.05575561523438, "logps_train/policy_1_w": -78.11407470703125, "logps_train/policy_2_2": -69.13420867919922, "logps_train/policy_2_w": -117.19157409667969, "logps_train/ref_1_2": -104.0, "logps_train/ref_1_l": -63.75, "logps_train/ref_1_w": -106.5, "logps_train/ref_2_2": -92.0, "logps_train/ref_2_w": -117.5, "rewards_train/1-2": -0.5121153593063354, "rewards_train/1-l": -1.721200704574585, "rewards_train/1-w": 2.862323760986328, "rewards_train/2-2": 2.2869696617126465, "rewards_train/2-w": 0.02263960987329483, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.583524465560913, "rewards_train/margins_1": 3.3744391202926636, "rewards_train/margins_2": 2.2643300518393517, "step": 609 }, { "epoch": 1.83, "learning_rate": 1.0899753930869395e-07, "loss": 0.645, "step": 610 }, { "epoch": 1.83, "logps_train/policy_1_2": -222.31472778320312, "logps_train/policy_1_l": -191.7534942626953, "logps_train/policy_1_w": -134.69439697265625, "logps_train/policy_2_2": -146.0612335205078, "logps_train/policy_2_w": -205.79708862304688, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -165.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -2.2220964431762695, "rewards_train/1-l": -2.371443271636963, "rewards_train/1-w": 3.036613941192627, "rewards_train/2-2": 3.3155081272125244, "rewards_train/2-w": -1.4746290445327759, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.40805721282959, "rewards_train/margins_1": 5.2587103843688965, "rewards_train/margins_2": 4.7901371717453, "step": 610 }, { "epoch": 1.83, "logps_train/policy_1_2": -140.7149200439453, "logps_train/policy_1_l": -106.12677001953125, "logps_train/policy_1_w": -71.29432678222656, "logps_train/policy_2_2": -96.22331237792969, "logps_train/policy_2_w": -119.2965087890625, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -93.0, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -118.0, "logps_train/ref_2_w": -109.5, "rewards_train/1-2": -0.6517641544342041, "rewards_train/1-l": -1.3107234239578247, "rewards_train/1-w": 2.426915168762207, "rewards_train/2-2": 2.2185380458831787, "rewards_train/2-w": -0.9858031272888184, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.7376385927200317, "rewards_train/margins_1": 3.078679323196411, "rewards_train/margins_2": 3.204341173171997, "step": 610 }, { "epoch": 1.83, "logps_train/policy_1_2": -224.09320068359375, "logps_train/policy_1_l": -176.55450439453125, "logps_train/policy_1_w": -122.5115966796875, "logps_train/policy_2_2": -128.6203155517578, "logps_train/policy_2_w": -202.58499145507812, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -2.4801223278045654, "rewards_train/1-l": -2.1525216102600098, "rewards_train/1-w": 2.81466007232666, "rewards_train/2-2": 3.211796760559082, "rewards_train/2-w": -2.0598652362823486, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.96718168258667, "rewards_train/margins_1": 5.294782400131226, "rewards_train/margins_2": 5.271661996841431, "step": 610 }, { "epoch": 1.83, "logps_train/policy_1_2": -165.4060821533203, "logps_train/policy_1_l": -169.57630920410156, "logps_train/policy_1_w": -102.0796890258789, "logps_train/policy_2_2": -100.06546020507812, "logps_train/policy_2_w": -166.886962890625, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -126.0, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -1.886702060699463, "rewards_train/1-l": -2.0840349197387695, "rewards_train/1-w": 2.4119653701782227, "rewards_train/2-2": 2.553610324859619, "rewards_train/2-w": -1.7337154150009155, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.496000289916992, "rewards_train/margins_1": 4.2986674308776855, "rewards_train/margins_2": 4.287325739860535, "step": 610 }, { "epoch": 1.83, "logps_train/policy_1_2": -164.05514526367188, "logps_train/policy_1_l": -144.10240173339844, "logps_train/policy_1_w": -100.1552734375, "logps_train/policy_2_2": -112.34978485107422, "logps_train/policy_2_w": -155.743408203125, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -146.0, "rewards_train/1-2": -1.088327169418335, "rewards_train/1-l": -2.0037941932678223, "rewards_train/1-w": 2.9122068881988525, "rewards_train/2-2": 2.8468575477600098, "rewards_train/2-w": -0.9055911898612976, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.916001081466675, "rewards_train/margins_1": 4.0005340576171875, "rewards_train/margins_2": 3.7524487376213074, "step": 611 }, { "epoch": 1.83, "logps_train/policy_1_2": -265.5379943847656, "logps_train/policy_1_l": -229.70982360839844, "logps_train/policy_1_w": -130.71249389648438, "logps_train/policy_2_2": -172.88404846191406, "logps_train/policy_2_w": -211.05397033691406, "logps_train/ref_1_2": -241.0, "logps_train/ref_1_l": -205.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -2.5116114616394043, "rewards_train/1-l": -2.473130702972412, "rewards_train/1-w": 3.150625705718994, "rewards_train/2-2": 3.6666736602783203, "rewards_train/2-w": -1.993678331375122, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.623756408691406, "rewards_train/margins_1": 5.662237167358398, "rewards_train/margins_2": 5.660351991653442, "step": 611 }, { "epoch": 1.83, "logps_train/policy_1_2": -133.73245239257812, "logps_train/policy_1_l": -109.24142456054688, "logps_train/policy_1_w": -74.32089233398438, "logps_train/policy_2_2": -71.59793090820312, "logps_train/policy_2_w": -130.3299560546875, "logps_train/ref_1_2": -111.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -94.0, "logps_train/ref_2_2": -93.0, "logps_train/ref_2_w": -112.5, "rewards_train/1-2": -2.237306594848633, "rewards_train/1-l": -1.9513518810272217, "rewards_train/1-w": 1.9450585842132568, "rewards_train/2-2": 2.114035129547119, "rewards_train/2-w": -1.78533935546875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8964104652404785, "rewards_train/margins_1": 4.18236517906189, "rewards_train/margins_2": 3.899374485015869, "step": 611 }, { "epoch": 1.83, "logps_train/policy_1_2": -208.09552001953125, "logps_train/policy_1_l": -171.92361450195312, "logps_train/policy_1_w": -138.52024841308594, "logps_train/policy_2_2": -130.23532104492188, "logps_train/policy_2_w": -216.49874877929688, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -2.34236478805542, "rewards_train/1-l": -2.3951926231384277, "rewards_train/1-w": 3.6065690517425537, "rewards_train/2-2": 2.902249336242676, "rewards_train/2-w": -1.59088933467865, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 6.0017616748809814, "rewards_train/margins_1": 5.948933839797974, "rewards_train/margins_2": 4.493138670921326, "step": 611 }, { "epoch": 1.83, "learning_rate": 1.0190110113823426e-07, "loss": 0.5608, "step": 612 }, { "epoch": 1.83, "logps_train/policy_1_2": -172.0113525390625, "logps_train/policy_1_l": -117.87105560302734, "logps_train/policy_1_w": -113.97248840332031, "logps_train/policy_2_2": -107.77720642089844, "logps_train/policy_2_w": -181.1367645263672, "logps_train/ref_1_2": -154.0, "logps_train/ref_1_l": -102.5, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.7859015464782715, "rewards_train/1-l": -1.5105911493301392, "rewards_train/1-w": 2.770719528198242, "rewards_train/2-2": 2.6744275093078613, "rewards_train/2-w": -1.5668013095855713, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.281310677528381, "rewards_train/margins_1": 4.556621074676514, "rewards_train/margins_2": 4.241228818893433, "step": 612 }, { "epoch": 1.83, "logps_train/policy_1_2": -237.142333984375, "logps_train/policy_1_l": -183.92579650878906, "logps_train/policy_1_w": -142.0514678955078, "logps_train/policy_2_2": -153.1236572265625, "logps_train/policy_2_w": -217.36767578125, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -2.3659443855285645, "rewards_train/1-l": -2.757716655731201, "rewards_train/1-w": 3.179227828979492, "rewards_train/2-2": 3.485729694366455, "rewards_train/2-w": -1.703565001487732, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.936944484710693, "rewards_train/margins_1": 5.545172214508057, "rewards_train/margins_2": 5.189294695854187, "step": 612 }, { "epoch": 1.83, "logps_train/policy_1_2": -218.72576904296875, "logps_train/policy_1_l": -191.15374755859375, "logps_train/policy_1_w": -147.701904296875, "logps_train/policy_2_2": -127.71056365966797, "logps_train/policy_2_w": -256.6673278808594, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -229.0, "rewards_train/1-2": -2.6499199867248535, "rewards_train/1-l": -2.162640333175659, "rewards_train/1-w": 3.6758551597595215, "rewards_train/2-2": 3.1246471405029297, "rewards_train/2-w": -2.834458351135254, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.838495492935181, "rewards_train/margins_1": 6.325775146484375, "rewards_train/margins_2": 5.959105491638184, "step": 612 }, { "epoch": 1.83, "logps_train/policy_1_2": -207.8614959716797, "logps_train/policy_1_l": -169.01914978027344, "logps_train/policy_1_w": -115.35594940185547, "logps_train/policy_2_2": -120.9159164428711, "logps_train/policy_2_w": -192.7962646484375, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -2.508805990219116, "rewards_train/1-l": -2.7318949699401855, "rewards_train/1-w": 3.069483518600464, "rewards_train/2-2": 2.995126962661743, "rewards_train/2-w": -1.8946642875671387, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.801378488540649, "rewards_train/margins_1": 5.57828950881958, "rewards_train/margins_2": 4.889791250228882, "step": 612 }, { "epoch": 1.84, "logps_train/policy_1_2": -197.07186889648438, "logps_train/policy_1_l": -175.71148681640625, "logps_train/policy_1_w": -127.44302368164062, "logps_train/policy_2_2": -123.69660949707031, "logps_train/policy_2_w": -197.18833923339844, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -2.3200793266296387, "rewards_train/1-l": -2.2993717193603516, "rewards_train/1-w": 2.727278709411621, "rewards_train/2-2": 2.649186849594116, "rewards_train/2-w": -1.8956882953643799, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 5.026650428771973, "rewards_train/margins_1": 5.04735803604126, "rewards_train/margins_2": 4.544875144958496, "step": 613 }, { "epoch": 1.84, "logps_train/policy_1_2": -155.95350646972656, "logps_train/policy_1_l": -151.3316650390625, "logps_train/policy_1_w": -70.18510437011719, "logps_train/policy_2_2": -98.6446533203125, "logps_train/policy_2_w": -121.31182861328125, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -95.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -113.5, "rewards_train/1-2": -1.0969126224517822, "rewards_train/1-l": -2.8432488441467285, "rewards_train/1-w": 2.4978952407836914, "rewards_train/2-2": 2.92303466796875, "rewards_train/2-w": -0.8233706951141357, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.34114408493042, "rewards_train/margins_1": 3.5948078632354736, "rewards_train/margins_2": 3.7464053630828857, "step": 613 }, { "epoch": 1.84, "logps_train/policy_1_2": -242.47364807128906, "logps_train/policy_1_l": -218.21701049804688, "logps_train/policy_1_w": -144.41717529296875, "logps_train/policy_2_2": -169.53677368164062, "logps_train/policy_2_w": -218.8023681640625, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -193.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -210.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -0.8075222969055176, "rewards_train/1-l": -2.4770734310150146, "rewards_train/1-w": 3.858673095703125, "rewards_train/2-2": 4.001010894775391, "rewards_train/2-w": -0.7200797200202942, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.33574652671814, "rewards_train/margins_1": 4.666195392608643, "rewards_train/margins_2": 4.721090614795685, "step": 613 }, { "epoch": 1.84, "logps_train/policy_1_2": -218.67437744140625, "logps_train/policy_1_l": -185.74693298339844, "logps_train/policy_1_w": -127.14219665527344, "logps_train/policy_2_2": -148.64627075195312, "logps_train/policy_2_w": -212.3238067626953, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -159.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -190.0, "rewards_train/1-2": -1.8593811988830566, "rewards_train/1-l": -3.031235933303833, "rewards_train/1-w": 3.2178120613098145, "rewards_train/2-2": 2.654391288757324, "rewards_train/2-w": -2.221442937850952, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.2490479946136475, "rewards_train/margins_1": 5.077193260192871, "rewards_train/margins_2": 4.875834226608276, "step": 613 }, { "epoch": 1.84, "learning_rate": 9.503871319271552e-08, "loss": 0.5061, "step": 614 }, { "epoch": 1.84, "logps_train/policy_1_2": -181.99420166015625, "logps_train/policy_1_l": -181.42440795898438, "logps_train/policy_1_w": -124.92957305908203, "logps_train/policy_2_2": -111.84329223632812, "logps_train/policy_2_w": -199.75942993164062, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.8931446075439453, "rewards_train/1-l": -2.876889705657959, "rewards_train/1-w": 3.1534295082092285, "rewards_train/2-2": 2.8520967960357666, "rewards_train/2-w": -1.8214514255523682, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.0303192138671875, "rewards_train/margins_1": 5.046574115753174, "rewards_train/margins_2": 4.673548221588135, "step": 614 }, { "epoch": 1.84, "logps_train/policy_1_2": -183.65237426757812, "logps_train/policy_1_l": -148.89468383789062, "logps_train/policy_1_w": -125.58230590820312, "logps_train/policy_2_2": -125.31564331054688, "logps_train/policy_2_w": -179.54080200195312, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -154.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -1.2109408378601074, "rewards_train/1-l": -1.789290428161621, "rewards_train/1-w": 2.732377767562866, "rewards_train/2-2": 2.8567168712615967, "rewards_train/2-w": -1.1156761646270752, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.521668195724487, "rewards_train/margins_1": 3.9433186054229736, "rewards_train/margins_2": 3.972393035888672, "step": 614 }, { "epoch": 1.84, "logps_train/policy_1_2": -216.21583557128906, "logps_train/policy_1_l": -183.96775817871094, "logps_train/policy_1_w": -133.69775390625, "logps_train/policy_2_2": -154.76519775390625, "logps_train/policy_2_w": -204.30833435058594, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -0.827639102935791, "rewards_train/1-l": -2.3284168243408203, "rewards_train/1-w": 3.458350658416748, "rewards_train/2-2": 3.47660493850708, "rewards_train/2-w": -1.2602283954620361, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.786767482757568, "rewards_train/margins_1": 4.285989761352539, "rewards_train/margins_2": 4.736833333969116, "step": 614 }, { "epoch": 1.84, "logps_train/policy_1_2": -201.22698974609375, "logps_train/policy_1_l": -154.36599731445312, "logps_train/policy_1_w": -137.89739990234375, "logps_train/policy_2_2": -128.51162719726562, "logps_train/policy_2_w": -222.61285400390625, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -2.2410600185394287, "rewards_train/1-l": -1.9362083673477173, "rewards_train/1-w": 3.487993001937866, "rewards_train/2-2": 2.6586012840270996, "rewards_train/2-w": -2.54136323928833, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.4242013692855835, "rewards_train/margins_1": 5.729053020477295, "rewards_train/margins_2": 5.19996452331543, "step": 614 }, { "epoch": 1.84, "logps_train/policy_1_2": -177.15316772460938, "logps_train/policy_1_l": -175.771728515625, "logps_train/policy_1_w": -126.04844665527344, "logps_train/policy_2_2": -104.71029663085938, "logps_train/policy_2_w": -194.8253173828125, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -1.4981296062469482, "rewards_train/1-l": -2.347388505935669, "rewards_train/1-w": 3.3603904247283936, "rewards_train/2-2": 3.390298366546631, "rewards_train/2-w": -1.196986198425293, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.7077789306640625, "rewards_train/margins_1": 4.858520030975342, "rewards_train/margins_2": 4.587284564971924, "step": 615 }, { "epoch": 1.84, "logps_train/policy_1_2": -162.76174926757812, "logps_train/policy_1_l": -119.87336730957031, "logps_train/policy_1_w": -65.35426330566406, "logps_train/policy_2_2": -105.75543212890625, "logps_train/policy_2_w": -117.48685455322266, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -100.0, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -105.0, "rewards_train/1-2": -1.7322299480438232, "rewards_train/1-l": -1.9703203439712524, "rewards_train/1-w": 2.003830909729004, "rewards_train/2-2": 2.0129334926605225, "rewards_train/2-w": -1.2201694250106812, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.9741512537002563, "rewards_train/margins_1": 3.736060857772827, "rewards_train/margins_2": 3.2331029176712036, "step": 615 }, { "epoch": 1.84, "logps_train/policy_1_2": -226.34552001953125, "logps_train/policy_1_l": -256.03631591796875, "logps_train/policy_1_w": -171.44435119628906, "logps_train/policy_2_2": -161.6170196533203, "logps_train/policy_2_w": -253.55039978027344, "logps_train/ref_1_2": -214.0, "logps_train/ref_1_l": -215.0, "logps_train/ref_1_w": -220.0, "logps_train/ref_2_2": -192.0, "logps_train/ref_2_w": -248.0, "rewards_train/1-2": -1.1798642873764038, "rewards_train/1-l": -4.057538986206055, "rewards_train/1-w": 4.846189498901367, "rewards_train/2-2": 3.1015796661376953, "rewards_train/2-w": -0.5421484112739563, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 8.903728485107422, "rewards_train/margins_1": 6.026053786277771, "rewards_train/margins_2": 3.6437280774116516, "step": 615 }, { "epoch": 1.84, "logps_train/policy_1_2": -205.08433532714844, "logps_train/policy_1_l": -220.19317626953125, "logps_train/policy_1_w": -133.4334716796875, "logps_train/policy_2_2": -139.84027099609375, "logps_train/policy_2_w": -198.34056091308594, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -0.8060892820358276, "rewards_train/1-l": -2.1995420455932617, "rewards_train/1-w": 3.407433032989502, "rewards_train/2-2": 3.6147994995117188, "rewards_train/2-w": -1.0889387130737305, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.606975078582764, "rewards_train/margins_1": 4.21352231502533, "rewards_train/margins_2": 4.703738212585449, "step": 615 }, { "epoch": 1.84, "learning_rate": 8.841104522910343e-08, "loss": 0.8173, "step": 616 }, { "epoch": 1.84, "logps_train/policy_1_2": -233.15650939941406, "logps_train/policy_1_l": -234.79071044921875, "logps_train/policy_1_w": -165.76226806640625, "logps_train/policy_2_2": -154.72268676757812, "logps_train/policy_2_w": -246.0529022216797, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -209.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": -1.8011977672576904, "rewards_train/1-l": -2.583174228668213, "rewards_train/1-w": 3.911102294921875, "rewards_train/2-2": 3.194918632507324, "rewards_train/2-w": -1.5901765823364258, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.494276523590088, "rewards_train/margins_1": 5.712300062179565, "rewards_train/margins_2": 4.78509521484375, "step": 616 }, { "epoch": 1.84, "logps_train/policy_1_2": -146.89938354492188, "logps_train/policy_1_l": -85.81741333007812, "logps_train/policy_1_w": -82.27237701416016, "logps_train/policy_2_2": -86.90000915527344, "logps_train/policy_2_w": -138.30001831054688, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -71.5, "logps_train/ref_1_w": -111.5, "logps_train/ref_2_2": -112.0, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": -1.663766860961914, "rewards_train/1-l": -1.4158239364624023, "rewards_train/1-w": 2.9438562393188477, "rewards_train/2-2": 2.5293354988098145, "rewards_train/2-w": -1.1401588916778564, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.35968017578125, "rewards_train/margins_1": 4.607623100280762, "rewards_train/margins_2": 3.669494390487671, "step": 616 }, { "epoch": 1.84, "logps_train/policy_1_2": -157.20462036132812, "logps_train/policy_1_l": -165.1308135986328, "logps_train/policy_1_w": -94.99164581298828, "logps_train/policy_2_2": -114.64027404785156, "logps_train/policy_2_w": -147.82337951660156, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -123.5, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -0.6026895046234131, "rewards_train/1-l": -2.226606845855713, "rewards_train/1-w": 2.8651909828186035, "rewards_train/2-2": 2.301598072052002, "rewards_train/2-w": -0.5963991284370422, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.091797828674316, "rewards_train/margins_1": 3.4678804874420166, "rewards_train/margins_2": 2.897997200489044, "step": 616 }, { "epoch": 1.84, "logps_train/policy_1_2": -204.8130645751953, "logps_train/policy_1_l": -188.7734832763672, "logps_train/policy_1_w": -164.2358856201172, "logps_train/policy_2_2": -119.48881530761719, "logps_train/policy_2_w": -255.61697387695312, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -204.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": -2.0492749214172363, "rewards_train/1-l": -2.484379529953003, "rewards_train/1-w": 3.9484806060791016, "rewards_train/2-2": 3.359712600708008, "rewards_train/2-w": -2.4507596492767334, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.4328601360321045, "rewards_train/margins_1": 5.997755527496338, "rewards_train/margins_2": 5.810472249984741, "step": 616 }, { "epoch": 1.85, "logps_train/policy_1_2": -164.86447143554688, "logps_train/policy_1_l": -157.42173767089844, "logps_train/policy_1_w": -101.76870727539062, "logps_train/policy_2_2": -107.31314086914062, "logps_train/policy_2_w": -163.7881622314453, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -1.4133992195129395, "rewards_train/1-l": -2.2655129432678223, "rewards_train/1-w": 2.8542819023132324, "rewards_train/2-2": 2.572201728820801, "rewards_train/2-w": -1.188191533088684, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.119794845581055, "rewards_train/margins_1": 4.267681121826172, "rewards_train/margins_2": 3.760393261909485, "step": 617 }, { "epoch": 1.85, "logps_train/policy_1_2": -230.82119750976562, "logps_train/policy_1_l": -224.60800170898438, "logps_train/policy_1_w": -146.75601196289062, "logps_train/policy_2_2": -156.44509887695312, "logps_train/policy_2_w": -231.22052001953125, "logps_train/ref_1_2": -218.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": -1.2590739727020264, "rewards_train/1-l": -2.8703694343566895, "rewards_train/1-w": 3.203256130218506, "rewards_train/2-2": 3.1549038887023926, "rewards_train/2-w": -1.80916166305542, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.073625564575195, "rewards_train/margins_1": 4.462330102920532, "rewards_train/margins_2": 4.9640655517578125, "step": 617 }, { "epoch": 1.85, "logps_train/policy_1_2": -172.07870483398438, "logps_train/policy_1_l": -137.65512084960938, "logps_train/policy_1_w": -124.4388427734375, "logps_train/policy_2_2": -114.9569091796875, "logps_train/policy_2_w": -211.21914672851562, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -144.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -0.869632363319397, "rewards_train/1-l": -1.0623819828033447, "rewards_train/1-w": 3.740447759628296, "rewards_train/2-2": 2.839691638946533, "rewards_train/2-w": -1.6212003231048584, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.802829742431641, "rewards_train/margins_1": 4.610080122947693, "rewards_train/margins_2": 4.460891962051392, "step": 617 }, { "epoch": 1.85, "logps_train/policy_1_2": -174.796142578125, "logps_train/policy_1_l": -148.72955322265625, "logps_train/policy_1_w": -109.90263366699219, "logps_train/policy_2_2": -113.17196655273438, "logps_train/policy_2_w": -165.80882263183594, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -158.0, "rewards_train/1-2": -1.2030527591705322, "rewards_train/1-l": -1.725298523902893, "rewards_train/1-w": 3.0255579948425293, "rewards_train/2-2": 2.8841707706451416, "rewards_train/2-w": -0.7367410659790039, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.750856518745422, "rewards_train/margins_1": 4.2286107540130615, "rewards_train/margins_2": 3.6209118366241455, "step": 617 }, { "epoch": 1.85, "learning_rate": 8.201874409610732e-08, "loss": 0.5298, "step": 618 }, { "epoch": 1.85, "logps_train/policy_1_2": -125.80625915527344, "logps_train/policy_1_l": -171.30027770996094, "logps_train/policy_1_w": -74.04481506347656, "logps_train/policy_2_2": -73.6275634765625, "logps_train/policy_2_w": -128.7239227294922, "logps_train/ref_1_2": -110.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -92.5, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": -1.5993757247924805, "rewards_train/1-l": -2.306394100189209, "rewards_train/1-w": 2.1543071269989014, "rewards_train/2-2": 1.8671265840530396, "rewards_train/2-w": -1.2184853553771973, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.46070122718811, "rewards_train/margins_1": 3.753682851791382, "rewards_train/margins_2": 3.085611939430237, "step": 618 }, { "epoch": 1.85, "logps_train/policy_1_2": -201.37701416015625, "logps_train/policy_1_l": -107.0134506225586, "logps_train/policy_1_w": -102.95555114746094, "logps_train/policy_2_2": -135.85214233398438, "logps_train/policy_2_w": -148.68582153320312, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -92.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.4970765113830566, "rewards_train/1-l": -1.4370508193969727, "rewards_train/1-w": 3.0729990005493164, "rewards_train/2-2": 3.3140039443969727, "rewards_train/2-w": -0.10451848804950714, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.510049819946289, "rewards_train/margins_1": 4.570075511932373, "rewards_train/margins_2": 3.41852243244648, "step": 618 }, { "epoch": 1.85, "logps_train/policy_1_2": -238.27064514160156, "logps_train/policy_1_l": -161.96270751953125, "logps_train/policy_1_w": -102.27105712890625, "logps_train/policy_2_2": -150.92198181152344, "logps_train/policy_2_w": -169.85690307617188, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -2.9512834548950195, "rewards_train/1-l": -1.4836735725402832, "rewards_train/1-w": 2.434124708175659, "rewards_train/2-2": 3.0214734077453613, "rewards_train/2-w": -1.9313929080963135, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.9177982807159424, "rewards_train/margins_1": 5.385408163070679, "rewards_train/margins_2": 4.952866315841675, "step": 618 }, { "epoch": 1.85, "logps_train/policy_1_2": -221.8403778076172, "logps_train/policy_1_l": -143.9396514892578, "logps_train/policy_1_w": -162.83999633789062, "logps_train/policy_2_2": -140.9486846923828, "logps_train/policy_2_w": -250.74818420410156, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -234.0, "rewards_train/1-2": -1.721927285194397, "rewards_train/1-l": -1.6287310123443604, "rewards_train/1-w": 3.7843589782714844, "rewards_train/2-2": 3.414897918701172, "rewards_train/2-w": -1.6923965215682983, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.413089990615845, "rewards_train/margins_1": 5.506286263465881, "rewards_train/margins_2": 5.10729444026947, "step": 618 }, { "epoch": 1.85, "logps_train/policy_1_2": -175.94158935546875, "logps_train/policy_1_l": -160.69967651367188, "logps_train/policy_1_w": -103.78031921386719, "logps_train/policy_2_2": -120.19720458984375, "logps_train/policy_2_w": -167.81594848632812, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -1.4074411392211914, "rewards_train/1-l": -1.778560996055603, "rewards_train/1-w": 2.946577548980713, "rewards_train/2-2": 2.4470763206481934, "rewards_train/2-w": -1.22300124168396, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.725138545036316, "rewards_train/margins_1": 4.354018688201904, "rewards_train/margins_2": 3.6700775623321533, "step": 619 }, { "epoch": 1.85, "logps_train/policy_1_2": -177.434814453125, "logps_train/policy_1_l": -160.83860778808594, "logps_train/policy_1_w": -140.33119201660156, "logps_train/policy_2_2": -112.75822448730469, "logps_train/policy_2_w": -218.43946838378906, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -200.0, "rewards_train/1-2": -1.5130136013031006, "rewards_train/1-l": -2.330930709838867, "rewards_train/1-w": 3.442857265472412, "rewards_train/2-2": 2.7390213012695312, "rewards_train/2-w": -1.874415397644043, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.773787975311279, "rewards_train/margins_1": 4.955870866775513, "rewards_train/margins_2": 4.613436698913574, "step": 619 }, { "epoch": 1.85, "logps_train/policy_1_2": -130.50201416015625, "logps_train/policy_1_l": -105.20661926269531, "logps_train/policy_1_w": -85.49009704589844, "logps_train/policy_2_2": -81.87364196777344, "logps_train/policy_2_w": -131.35617065429688, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -103.0, "logps_train/ref_2_w": -120.5, "rewards_train/1-2": -1.3271536827087402, "rewards_train/1-l": -2.0148510932922363, "rewards_train/1-w": 1.9545056819915771, "rewards_train/2-2": 2.147791862487793, "rewards_train/2-w": -1.1020221710205078, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.9693567752838135, "rewards_train/margins_1": 3.2816593647003174, "rewards_train/margins_2": 3.249814033508301, "step": 619 }, { "epoch": 1.85, "logps_train/policy_1_2": -174.015625, "logps_train/policy_1_l": -145.602294921875, "logps_train/policy_1_w": -116.43850708007812, "logps_train/policy_2_2": -112.2360610961914, "logps_train/policy_2_w": -172.22203063964844, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.3945308923721313, "rewards_train/1-l": -2.2434325218200684, "rewards_train/1-w": 2.669039726257324, "rewards_train/2-2": 2.6377224922180176, "rewards_train/2-w": -1.0793322324752808, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.912472248077393, "rewards_train/margins_1": 4.063570618629456, "rewards_train/margins_2": 3.7170547246932983, "step": 619 }, { "epoch": 1.86, "learning_rate": 7.586243367104895e-08, "loss": 0.5517, "step": 620 }, { "epoch": 1.86, "logps_train/policy_1_2": -172.004150390625, "logps_train/policy_1_l": -159.48391723632812, "logps_train/policy_1_w": -119.75978088378906, "logps_train/policy_2_2": -109.98404693603516, "logps_train/policy_2_w": -201.01727294921875, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -138.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -1.3901116847991943, "rewards_train/1-l": -1.8611853122711182, "rewards_train/1-w": 3.235106945037842, "rewards_train/2-2": 2.7698564529418945, "rewards_train/2-w": -2.0022153854370117, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.09629225730896, "rewards_train/margins_1": 4.625218629837036, "rewards_train/margins_2": 4.772071838378906, "step": 620 }, { "epoch": 1.86, "logps_train/policy_1_2": -225.017822265625, "logps_train/policy_1_l": -173.32357788085938, "logps_train/policy_1_w": -170.1387481689453, "logps_train/policy_2_2": -152.19857788085938, "logps_train/policy_2_w": -251.6513671875, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -205.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": -1.6486561298370361, "rewards_train/1-l": -2.034700870513916, "rewards_train/1-w": 3.5103437900543213, "rewards_train/2-2": 3.3676419258117676, "rewards_train/2-w": -2.0362298488616943, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.545044660568237, "rewards_train/margins_1": 5.158999919891357, "rewards_train/margins_2": 5.403871774673462, "step": 620 }, { "epoch": 1.86, "logps_train/policy_1_2": -204.65322875976562, "logps_train/policy_1_l": -148.89715576171875, "logps_train/policy_1_w": -113.60370635986328, "logps_train/policy_2_2": -131.28549194335938, "logps_train/policy_2_w": -171.57608032226562, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -120.5, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -159.0, "rewards_train/1-2": -2.1660561561584473, "rewards_train/1-l": -2.8518736362457275, "rewards_train/1-w": 2.635674476623535, "rewards_train/2-2": 2.9599289894104004, "rewards_train/2-w": -1.2527244091033936, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.487548112869263, "rewards_train/margins_1": 4.801730632781982, "rewards_train/margins_2": 4.212653398513794, "step": 620 }, { "epoch": 1.86, "logps_train/policy_1_2": -142.49806213378906, "logps_train/policy_1_l": -120.19793701171875, "logps_train/policy_1_w": -93.91498565673828, "logps_train/policy_2_2": -86.33233642578125, "logps_train/policy_2_w": -149.02169799804688, "logps_train/ref_1_2": -126.5, "logps_train/ref_1_l": -102.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -1.590822696685791, "rewards_train/1-l": -1.7952821254730225, "rewards_train/1-w": 2.344634532928467, "rewards_train/2-2": 2.3159852027893066, "rewards_train/2-w": -1.4994350671768188, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.139916658401489, "rewards_train/margins_1": 3.935457229614258, "rewards_train/margins_2": 3.8154202699661255, "step": 620 }, { "epoch": 1.86, "logps_train/policy_1_2": -164.28057861328125, "logps_train/policy_1_l": -103.9769515991211, "logps_train/policy_1_w": -77.52155303955078, "logps_train/policy_2_2": -97.1369857788086, "logps_train/policy_2_w": -144.0299835205078, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -90.0, "logps_train/ref_1_w": -101.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -126.0, "rewards_train/1-2": -1.9116512537002563, "rewards_train/1-l": -1.4115616083145142, "rewards_train/1-w": 2.3416924476623535, "rewards_train/2-2": 2.335926055908203, "rewards_train/2-w": -1.8291704654693604, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.7532540559768677, "rewards_train/margins_1": 4.25334370136261, "rewards_train/margins_2": 4.1650965213775635, "step": 621 }, { "epoch": 1.86, "logps_train/policy_1_2": -236.94491577148438, "logps_train/policy_1_l": -159.00094604492188, "logps_train/policy_1_w": -136.23439025878906, "logps_train/policy_2_2": -154.82577514648438, "logps_train/policy_2_w": -214.11587524414062, "logps_train/ref_1_2": -215.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -2.177500009536743, "rewards_train/1-l": -2.318990707397461, "rewards_train/1-w": 3.2099599838256836, "rewards_train/2-2": 3.488222122192383, "rewards_train/2-w": -1.7082679271697998, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.5289506912231445, "rewards_train/margins_1": 5.387459993362427, "rewards_train/margins_2": 5.196490049362183, "step": 621 }, { "epoch": 1.86, "logps_train/policy_1_2": -178.95233154296875, "logps_train/policy_1_l": -159.75311279296875, "logps_train/policy_1_w": -105.83779907226562, "logps_train/policy_2_2": -119.4690933227539, "logps_train/policy_2_w": -154.0672149658203, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -136.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -151.0, "rewards_train/1-2": -0.8542178869247437, "rewards_train/1-l": -2.56779146194458, "rewards_train/1-w": 2.9720797538757324, "rewards_train/2-2": 2.973207712173462, "rewards_train/2-w": -0.3016436696052551, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.5398712158203125, "rewards_train/margins_1": 3.826297640800476, "rewards_train/margins_2": 3.274851381778717, "step": 621 }, { "epoch": 1.86, "logps_train/policy_1_2": -190.91421508789062, "logps_train/policy_1_l": -207.75286865234375, "logps_train/policy_1_w": -128.56089782714844, "logps_train/policy_2_2": -122.44009399414062, "logps_train/policy_2_w": -206.65679931640625, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.6347802877426147, "rewards_train/1-l": -2.5405211448669434, "rewards_train/1-w": 2.9517226219177246, "rewards_train/2-2": 2.809896945953369, "rewards_train/2-w": -2.0063042640686035, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.492243766784668, "rewards_train/margins_1": 4.586502909660339, "rewards_train/margins_2": 4.816201210021973, "step": 621 }, { "epoch": 1.86, "learning_rate": 6.994271479897313e-08, "loss": 0.4927, "step": 622 }, { "epoch": 1.86, "logps_train/policy_1_2": -143.0115509033203, "logps_train/policy_1_l": -102.09441375732422, "logps_train/policy_1_w": -88.79969787597656, "logps_train/policy_2_2": -85.8750991821289, "logps_train/policy_2_w": -141.18775939941406, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -130.0, "rewards_train/1-2": -1.6871416568756104, "rewards_train/1-l": -1.0485522747039795, "rewards_train/1-w": 2.1948347091674805, "rewards_train/2-2": 2.135634422302246, "rewards_train/2-w": -1.0711199045181274, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.24338698387146, "rewards_train/margins_1": 3.881976366043091, "rewards_train/margins_2": 3.2067543268203735, "step": 622 }, { "epoch": 1.86, "logps_train/policy_1_2": -233.41500854492188, "logps_train/policy_1_l": -242.216552734375, "logps_train/policy_1_w": -153.3858184814453, "logps_train/policy_2_2": -153.57369995117188, "logps_train/policy_2_w": -220.05352783203125, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -195.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": -1.0778298377990723, "rewards_train/1-l": -3.0035881996154785, "rewards_train/1-w": 3.768096685409546, "rewards_train/2-2": 4.186575412750244, "rewards_train/2-w": -0.5006651282310486, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.771684885025024, "rewards_train/margins_1": 4.845926523208618, "rewards_train/margins_2": 4.687240540981293, "step": 622 }, { "epoch": 1.86, "logps_train/policy_1_2": -211.85647583007812, "logps_train/policy_1_l": -164.85113525390625, "logps_train/policy_1_w": -146.97100830078125, "logps_train/policy_2_2": -140.20187377929688, "logps_train/policy_2_w": -212.60391235351562, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -1.2778346538543701, "rewards_train/1-l": -2.431891441345215, "rewards_train/1-w": 3.7481637001037598, "rewards_train/2-2": 3.6751253604888916, "rewards_train/2-w": -0.9732801914215088, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.180055141448975, "rewards_train/margins_1": 5.02599835395813, "rewards_train/margins_2": 4.6484055519104, "step": 622 }, { "epoch": 1.86, "logps_train/policy_1_2": -152.0919189453125, "logps_train/policy_1_l": -78.89967346191406, "logps_train/policy_1_w": -83.17835998535156, "logps_train/policy_2_2": -96.93138122558594, "logps_train/policy_2_w": -140.38247680664062, "logps_train/ref_1_2": -136.0, "logps_train/ref_1_l": -63.25, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -121.5, "logps_train/ref_2_w": -127.0, "rewards_train/1-2": -1.595128059387207, "rewards_train/1-l": -1.5711684226989746, "rewards_train/1-w": 2.7781600952148438, "rewards_train/2-2": 2.4447519779205322, "rewards_train/2-w": -1.3528950214385986, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.349328517913818, "rewards_train/margins_1": 4.373288154602051, "rewards_train/margins_2": 3.797646999359131, "step": 622 }, { "epoch": 1.87, "logps_train/policy_1_2": -186.90084838867188, "logps_train/policy_1_l": -118.68110656738281, "logps_train/policy_1_w": -99.9234848022461, "logps_train/policy_2_2": -104.80158996582031, "logps_train/policy_2_w": -179.74887084960938, "logps_train/ref_1_2": -159.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -129.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -2.822312593460083, "rewards_train/1-l": -2.1666460037231445, "rewards_train/1-w": 2.8943703174591064, "rewards_train/2-2": 2.6954267024993896, "rewards_train/2-w": -2.2424654960632324, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.061016321182251, "rewards_train/margins_1": 5.7166829109191895, "rewards_train/margins_2": 4.937892198562622, "step": 623 }, { "epoch": 1.87, "logps_train/policy_1_2": -218.31900024414062, "logps_train/policy_1_l": -171.75405883789062, "logps_train/policy_1_w": -128.35784912109375, "logps_train/policy_2_2": -140.36151123046875, "logps_train/policy_2_w": -201.63461303710938, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -181.0, "rewards_train/1-2": -2.1178367137908936, "rewards_train/1-l": -2.4830222129821777, "rewards_train/1-w": 2.9985904693603516, "rewards_train/2-2": 3.0560364723205566, "rewards_train/2-w": -2.027524471282959, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.481612682342529, "rewards_train/margins_1": 5.116427183151245, "rewards_train/margins_2": 5.083560943603516, "step": 623 }, { "epoch": 1.87, "logps_train/policy_1_2": -193.48008728027344, "logps_train/policy_1_l": -204.8753204345703, "logps_train/policy_1_w": -128.7760009765625, "logps_train/policy_2_2": -131.53128051757812, "logps_train/policy_2_w": -191.1856689453125, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -177.0, "logps_train/ref_1_w": -156.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.3870714902877808, "rewards_train/1-l": -2.763117790222168, "rewards_train/1-w": 2.7198617458343506, "rewards_train/2-2": 2.5559537410736084, "rewards_train/2-w": -1.1623151302337646, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.4829795360565186, "rewards_train/margins_1": 4.106933236122131, "rewards_train/margins_2": 3.718268871307373, "step": 623 }, { "epoch": 1.87, "logps_train/policy_1_2": -137.72024536132812, "logps_train/policy_1_l": -141.56973266601562, "logps_train/policy_1_w": -71.33853149414062, "logps_train/policy_2_2": -92.85556030273438, "logps_train/policy_2_w": -112.72006225585938, "logps_train/ref_1_2": -126.0, "logps_train/ref_1_l": -121.0, "logps_train/ref_1_w": -89.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -102.5, "rewards_train/1-2": -1.1298367977142334, "rewards_train/1-l": -2.0419344902038574, "rewards_train/1-w": 1.7428075075149536, "rewards_train/2-2": 2.0679588317871094, "rewards_train/2-w": -1.0116548538208008, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.784741997718811, "rewards_train/margins_1": 2.872644305229187, "rewards_train/margins_2": 3.07961368560791, "step": 623 }, { "epoch": 1.87, "learning_rate": 6.426016523400553e-08, "loss": 0.5801, "step": 624 }, { "epoch": 1.87, "logps_train/policy_1_2": -192.61813354492188, "logps_train/policy_1_l": -186.78298950195312, "logps_train/policy_1_w": -122.51576232910156, "logps_train/policy_2_2": -129.11807250976562, "logps_train/policy_2_w": -177.81573486328125, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -0.9120097756385803, "rewards_train/1-l": -2.2324509620666504, "rewards_train/1-w": 2.8890485763549805, "rewards_train/2-2": 3.021395444869995, "rewards_train/2-w": -0.8792291879653931, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.121499538421631, "rewards_train/margins_1": 3.801058351993561, "rewards_train/margins_2": 3.900624632835388, "step": 624 }, { "epoch": 1.87, "logps_train/policy_1_2": -181.91551208496094, "logps_train/policy_1_l": -139.58145141601562, "logps_train/policy_1_w": -114.22885131835938, "logps_train/policy_2_2": -120.18264770507812, "logps_train/policy_2_w": -185.14903259277344, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -146.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.3083487749099731, "rewards_train/1-l": -2.0567779541015625, "rewards_train/1-w": 3.2009425163269043, "rewards_train/2-2": 2.6657190322875977, "rewards_train/2-w": -1.5219347476959229, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.257720470428467, "rewards_train/margins_1": 4.509291291236877, "rewards_train/margins_2": 4.1876537799835205, "step": 624 }, { "epoch": 1.87, "logps_train/policy_1_2": -147.55567932128906, "logps_train/policy_1_l": -112.81065368652344, "logps_train/policy_1_w": -94.99594116210938, "logps_train/policy_2_2": -97.2818832397461, "logps_train/policy_2_w": -153.09104919433594, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -122.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -1.0145518779754639, "rewards_train/1-l": -1.8068469762802124, "rewards_train/1-w": 3.012515068054199, "rewards_train/2-2": 2.461069345474243, "rewards_train/2-w": -1.0548080205917358, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 4.819362044334412, "rewards_train/margins_1": 4.027066946029663, "rewards_train/margins_2": 3.515877366065979, "step": 624 }, { "epoch": 1.87, "logps_train/policy_1_2": -165.3370361328125, "logps_train/policy_1_l": -172.794677734375, "logps_train/policy_1_w": -82.74284362792969, "logps_train/policy_2_2": -98.50951385498047, "logps_train/policy_2_w": -127.6839599609375, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -103.0, "logps_train/ref_2_2": -126.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": -1.9174937009811401, "rewards_train/1-l": -2.4353270530700684, "rewards_train/1-w": 2.0324535369873047, "rewards_train/2-2": 2.7713146209716797, "rewards_train/2-w": -1.2619503736495972, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.467780590057373, "rewards_train/margins_1": 3.949947237968445, "rewards_train/margins_2": 4.033264994621277, "step": 624 }, { "epoch": 1.87, "logps_train/policy_1_2": -178.38995361328125, "logps_train/policy_1_l": -155.73431396484375, "logps_train/policy_1_w": -136.73236083984375, "logps_train/policy_2_2": -115.84201049804688, "logps_train/policy_2_w": -208.04620361328125, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -1.4495420455932617, "rewards_train/1-l": -2.026263475418091, "rewards_train/1-w": 2.877739191055298, "rewards_train/2-2": 2.7245872020721436, "rewards_train/2-w": -2.0971970558166504, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.904002666473389, "rewards_train/margins_1": 4.32728123664856, "rewards_train/margins_2": 4.821784257888794, "step": 625 }, { "epoch": 1.87, "logps_train/policy_1_2": -158.74276733398438, "logps_train/policy_1_l": -184.65591430664062, "logps_train/policy_1_w": -117.93606567382812, "logps_train/policy_2_2": -106.73245239257812, "logps_train/policy_2_w": -169.48960876464844, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -167.0, "rewards_train/1-2": -0.6672465801239014, "rewards_train/1-l": -2.350552558898926, "rewards_train/1-w": 2.9028778076171875, "rewards_train/2-2": 2.7349586486816406, "rewards_train/2-w": -0.23528949916362762, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.253430366516113, "rewards_train/margins_1": 3.570124387741089, "rewards_train/margins_2": 2.9702481478452682, "step": 625 }, { "epoch": 1.87, "logps_train/policy_1_2": -274.43353271484375, "logps_train/policy_1_l": -239.77149963378906, "logps_train/policy_1_w": -157.32810974121094, "logps_train/policy_2_2": -172.47291564941406, "logps_train/policy_2_w": -253.61631774902344, "logps_train/ref_1_2": -253.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -217.0, "logps_train/ref_2_w": -230.0, "rewards_train/1-2": -2.165228843688965, "rewards_train/1-l": -3.941993236541748, "rewards_train/1-w": 3.6250014305114746, "rewards_train/2-2": 4.432395935058594, "rewards_train/2-w": -2.2936625480651855, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.566994667053223, "rewards_train/margins_1": 5.7902302742004395, "rewards_train/margins_2": 6.726058483123779, "step": 625 }, { "epoch": 1.87, "logps_train/policy_1_2": -181.96322631835938, "logps_train/policy_1_l": -157.44606018066406, "logps_train/policy_1_w": -89.88912200927734, "logps_train/policy_2_2": -107.48474884033203, "logps_train/policy_2_w": -169.54043579101562, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -2.3995814323425293, "rewards_train/1-l": -1.9177502393722534, "rewards_train/1-w": 3.039212226867676, "rewards_train/2-2": 2.572765350341797, "rewards_train/2-w": -2.531485080718994, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.956962466239929, "rewards_train/margins_1": 5.438793659210205, "rewards_train/margins_2": 5.104250431060791, "step": 625 }, { "epoch": 1.87, "learning_rate": 5.8815339582966316e-08, "loss": 0.5907, "step": 626 }, { "epoch": 1.87, "logps_train/policy_1_2": -133.77166748046875, "logps_train/policy_1_l": -113.29591369628906, "logps_train/policy_1_w": -96.88175201416016, "logps_train/policy_2_2": -73.33214569091797, "logps_train/policy_2_w": -154.48565673828125, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.5193533897399902, "rewards_train/1-l": -1.8204118013381958, "rewards_train/1-w": 2.708699941635132, "rewards_train/2-2": 2.3527231216430664, "rewards_train/2-w": -1.4626282453536987, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.529111742973328, "rewards_train/margins_1": 4.228053331375122, "rewards_train/margins_2": 3.815351366996765, "step": 626 }, { "epoch": 1.87, "logps_train/policy_1_2": -198.30026245117188, "logps_train/policy_1_l": -183.34283447265625, "logps_train/policy_1_w": -130.0269775390625, "logps_train/policy_2_2": -115.0535888671875, "logps_train/policy_2_w": -232.9645233154297, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -145.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -2.288618564605713, "rewards_train/1-l": -3.05537748336792, "rewards_train/1-w": 3.7871460914611816, "rewards_train/2-2": 3.0127556324005127, "rewards_train/2-w": -2.7933263778686523, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.842523574829102, "rewards_train/margins_1": 6.0757646560668945, "rewards_train/margins_2": 5.806082010269165, "step": 626 }, { "epoch": 1.87, "logps_train/policy_1_2": -236.61456298828125, "logps_train/policy_1_l": -243.523193359375, "logps_train/policy_1_w": -140.28211975097656, "logps_train/policy_2_2": -146.20762634277344, "logps_train/policy_2_w": -225.02227783203125, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -2.5262980461120605, "rewards_train/1-l": -3.7137460708618164, "rewards_train/1-w": 3.0921008586883545, "rewards_train/2-2": 3.469863176345825, "rewards_train/2-w": -2.3022279739379883, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.805846929550171, "rewards_train/margins_1": 5.618398904800415, "rewards_train/margins_2": 5.7720911502838135, "step": 626 }, { "epoch": 1.87, "logps_train/policy_1_2": -161.22323608398438, "logps_train/policy_1_l": -193.8038330078125, "logps_train/policy_1_w": -137.0619354248047, "logps_train/policy_2_2": -107.74413299560547, "logps_train/policy_2_w": -207.97549438476562, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -167.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -0.9838461875915527, "rewards_train/1-l": -2.6662724018096924, "rewards_train/1-w": 3.306307315826416, "rewards_train/2-2": 2.721680164337158, "rewards_train/2-w": -1.329190731048584, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.972579717636108, "rewards_train/margins_1": 4.290153503417969, "rewards_train/margins_2": 4.050870895385742, "step": 626 }, { "epoch": 1.88, "logps_train/policy_1_2": -186.81674194335938, "logps_train/policy_1_l": -206.98623657226562, "logps_train/policy_1_w": -112.0928955078125, "logps_train/policy_2_2": -123.40679931640625, "logps_train/policy_2_w": -182.1033172607422, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.7597987055778503, "rewards_train/1-l": -3.0626871585845947, "rewards_train/1-w": 3.461803913116455, "rewards_train/2-2": 3.536663293838501, "rewards_train/2-w": -1.2798625230789185, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.52449107170105, "rewards_train/margins_1": 4.221602618694305, "rewards_train/margins_2": 4.816525816917419, "step": 627 }, { "epoch": 1.88, "logps_train/policy_1_2": -90.49577331542969, "logps_train/policy_1_l": -109.70475769042969, "logps_train/policy_1_w": -87.28453063964844, "logps_train/policy_2_2": -56.468448638916016, "logps_train/policy_2_w": -136.81959533691406, "logps_train/ref_1_2": -81.5, "logps_train/ref_1_l": -81.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -69.5, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": -0.9022562503814697, "rewards_train/1-l": -2.8903427124023438, "rewards_train/1-w": 3.6919145584106445, "rewards_train/2-2": 1.2898069620132446, "rewards_train/2-w": 0.22012168169021606, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 6.582257270812988, "rewards_train/margins_1": 4.594170808792114, "rewards_train/margins_2": 1.0696852803230286, "step": 627 }, { "epoch": 1.88, "logps_train/policy_1_2": -227.42623901367188, "logps_train/policy_1_l": -194.09519958496094, "logps_train/policy_1_w": -164.80209350585938, "logps_train/policy_2_2": -151.4129638671875, "logps_train/policy_2_w": -250.24005126953125, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -231.0, "rewards_train/1-2": -1.5012174844741821, "rewards_train/1-l": -2.4396727085113525, "rewards_train/1-w": 3.7740888595581055, "rewards_train/2-2": 3.299523115158081, "rewards_train/2-w": -1.887286901473999, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.213761568069458, "rewards_train/margins_1": 5.275306344032288, "rewards_train/margins_2": 5.18681001663208, "step": 627 }, { "epoch": 1.88, "logps_train/policy_1_2": -144.91357421875, "logps_train/policy_1_l": -136.0411376953125, "logps_train/policy_1_w": -87.3305892944336, "logps_train/policy_2_2": -93.06654357910156, "logps_train/policy_2_w": -126.30902862548828, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -113.5, "logps_train/ref_1_w": -113.0, "logps_train/ref_2_2": -119.0, "logps_train/ref_2_w": -123.5, "rewards_train/1-2": -0.7130002379417419, "rewards_train/1-l": -2.239733934402466, "rewards_train/1-w": 2.5416481494903564, "rewards_train/2-2": 2.567112445831299, "rewards_train/2-w": -0.27133238315582275, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.781382083892822, "rewards_train/margins_1": 3.2546483874320984, "rewards_train/margins_2": 2.8384448289871216, "step": 627 }, { "epoch": 1.88, "learning_rate": 5.360876925123992e-08, "loss": 0.7723, "step": 628 }, { "epoch": 1.88, "logps_train/policy_1_2": -204.87261962890625, "logps_train/policy_1_l": -208.46173095703125, "logps_train/policy_1_w": -137.5421142578125, "logps_train/policy_2_2": -140.13841247558594, "logps_train/policy_2_w": -201.95684814453125, "logps_train/ref_1_2": -187.0, "logps_train/ref_1_l": -178.0, "logps_train/ref_1_w": -164.0, "logps_train/ref_2_2": -166.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.8115296363830566, "rewards_train/1-l": -3.071563720703125, "rewards_train/1-w": 2.659899950027466, "rewards_train/2-2": 2.6269302368164062, "rewards_train/2-w": -1.664239764213562, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.731463670730591, "rewards_train/margins_1": 4.4714295864105225, "rewards_train/margins_2": 4.291170001029968, "step": 628 }, { "epoch": 1.88, "logps_train/policy_1_2": -195.54351806640625, "logps_train/policy_1_l": -185.42050170898438, "logps_train/policy_1_w": -129.92657470703125, "logps_train/policy_2_2": -124.67909240722656, "logps_train/policy_2_w": -196.04180908203125, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.3703683614730835, "rewards_train/1-l": -2.504549741744995, "rewards_train/1-w": 3.0815601348876953, "rewards_train/2-2": 3.3687117099761963, "rewards_train/2-w": -1.2659001350402832, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.58610987663269, "rewards_train/margins_1": 4.451928496360779, "rewards_train/margins_2": 4.6346118450164795, "step": 628 }, { "epoch": 1.88, "logps_train/policy_1_2": -144.00575256347656, "logps_train/policy_1_l": -123.22547912597656, "logps_train/policy_1_w": -66.64779663085938, "logps_train/policy_2_2": -82.779052734375, "logps_train/policy_2_w": -112.52586364746094, "logps_train/ref_1_2": -124.5, "logps_train/ref_1_l": -101.5, "logps_train/ref_1_w": -85.0, "logps_train/ref_2_2": -107.0, "logps_train/ref_2_w": -102.0, "rewards_train/1-2": -1.9518446922302246, "rewards_train/1-l": -2.1877377033233643, "rewards_train/1-w": 1.8525018692016602, "rewards_train/2-2": 2.4089112281799316, "rewards_train/2-w": -1.0408439636230469, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.040239572525024, "rewards_train/margins_1": 3.8043465614318848, "rewards_train/margins_2": 3.4497551918029785, "step": 628 }, { "epoch": 1.88, "logps_train/policy_1_2": -187.9105224609375, "logps_train/policy_1_l": -196.53958129882812, "logps_train/policy_1_w": -122.22862243652344, "logps_train/policy_2_2": -111.66316986083984, "logps_train/policy_2_w": -206.26797485351562, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -2.0266003608703613, "rewards_train/1-l": -2.523001194000244, "rewards_train/1-w": 2.9540910720825195, "rewards_train/2-2": 2.8436927795410156, "rewards_train/2-w": -2.0182044506073, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.477092266082764, "rewards_train/margins_1": 4.980691432952881, "rewards_train/margins_2": 4.861897230148315, "step": 628 }, { "epoch": 1.88, "logps_train/policy_1_2": -229.97164916992188, "logps_train/policy_1_l": -225.7549285888672, "logps_train/policy_1_w": -130.89405822753906, "logps_train/policy_2_2": -148.94216918945312, "logps_train/policy_2_w": -204.35617065429688, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -191.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -193.0, "rewards_train/1-2": -1.8479456901550293, "rewards_train/1-l": -3.4649455547332764, "rewards_train/1-w": 3.0848135948181152, "rewards_train/2-2": 3.3710174560546875, "rewards_train/2-w": -1.1528053283691406, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.549759149551392, "rewards_train/margins_1": 4.9327592849731445, "rewards_train/margins_2": 4.523822784423828, "step": 629 }, { "epoch": 1.88, "logps_train/policy_1_2": -220.32095336914062, "logps_train/policy_1_l": -208.43792724609375, "logps_train/policy_1_w": -155.96002197265625, "logps_train/policy_2_2": -142.40463256835938, "logps_train/policy_2_w": -247.6226043701172, "logps_train/ref_1_2": -203.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -192.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -227.0, "rewards_train/1-2": -1.751626968383789, "rewards_train/1-l": -2.7537546157836914, "rewards_train/1-w": 3.568059206008911, "rewards_train/2-2": 3.2573885917663574, "rewards_train/2-w": -2.0235886573791504, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.3218138217926025, "rewards_train/margins_1": 5.3196861743927, "rewards_train/margins_2": 5.280977249145508, "step": 629 }, { "epoch": 1.88, "logps_train/policy_1_2": -155.45394897460938, "logps_train/policy_1_l": -116.37507629394531, "logps_train/policy_1_w": -98.12242126464844, "logps_train/policy_2_2": -100.54808044433594, "logps_train/policy_2_w": -156.65216064453125, "logps_train/ref_1_2": -146.0, "logps_train/ref_1_l": -91.5, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -150.0, "rewards_train/1-2": -0.9141446948051453, "rewards_train/1-l": -2.4666335582733154, "rewards_train/1-w": 2.8358049392700195, "rewards_train/2-2": 3.0553483963012695, "rewards_train/2-w": -0.6660939455032349, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.8125, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.302438497543335, "rewards_train/margins_1": 3.749949634075165, "rewards_train/margins_2": 3.7214423418045044, "step": 629 }, { "epoch": 1.88, "logps_train/policy_1_2": -197.16452026367188, "logps_train/policy_1_l": -189.67857360839844, "logps_train/policy_1_w": -136.25839233398438, "logps_train/policy_2_2": -130.5749053955078, "logps_train/policy_2_w": -205.07135009765625, "logps_train/ref_1_2": -182.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -1.5102025270462036, "rewards_train/1-l": -2.460961103439331, "rewards_train/1-w": 3.131582498550415, "rewards_train/2-2": 3.0204391479492188, "rewards_train/2-w": -1.427445888519287, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.592543601989746, "rewards_train/margins_1": 4.641785025596619, "rewards_train/margins_2": 4.447885036468506, "step": 629 }, { "epoch": 1.89, "learning_rate": 4.864096239091287e-08, "loss": 0.5074, "step": 630 }, { "epoch": 1.89, "logps_train/policy_1_2": -175.14039611816406, "logps_train/policy_1_l": -158.44305419921875, "logps_train/policy_1_w": -85.73193359375, "logps_train/policy_2_2": -112.18037414550781, "logps_train/policy_2_w": -140.25509643554688, "logps_train/ref_1_2": -158.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -111.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": -1.6991956233978271, "rewards_train/1-l": -2.2643237113952637, "rewards_train/1-w": 2.495166301727295, "rewards_train/2-2": 2.7702436447143555, "rewards_train/2-w": -1.2739465236663818, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.759490013122559, "rewards_train/margins_1": 4.194361925125122, "rewards_train/margins_2": 4.044190168380737, "step": 630 }, { "epoch": 1.89, "logps_train/policy_1_2": -155.8006591796875, "logps_train/policy_1_l": -139.55889892578125, "logps_train/policy_1_w": -87.74583435058594, "logps_train/policy_2_2": -107.10504150390625, "logps_train/policy_2_w": -140.10284423828125, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -119.5, "logps_train/ref_1_w": -117.0, "logps_train/ref_2_2": -131.0, "logps_train/ref_2_w": -136.0, "rewards_train/1-2": -1.0972532033920288, "rewards_train/1-l": -1.9872376918792725, "rewards_train/1-w": 2.9305927753448486, "rewards_train/2-2": 2.401604652404785, "rewards_train/2-w": -0.3705372214317322, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.917830467224121, "rewards_train/margins_1": 4.027845978736877, "rewards_train/margins_2": 2.7721418738365173, "step": 630 }, { "epoch": 1.89, "logps_train/policy_1_2": -241.3707733154297, "logps_train/policy_1_l": -192.75222778320312, "logps_train/policy_1_w": -113.25827026367188, "logps_train/policy_2_2": -163.1671142578125, "logps_train/policy_2_w": -188.89276123046875, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -2.0706710815429688, "rewards_train/1-l": -2.359304189682007, "rewards_train/1-w": 2.9839389324188232, "rewards_train/2-2": 3.1250858306884766, "rewards_train/2-w": -1.8482595682144165, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.34324312210083, "rewards_train/margins_1": 5.054610013961792, "rewards_train/margins_2": 4.973345398902893, "step": 630 }, { "epoch": 1.89, "logps_train/policy_1_2": -160.15347290039062, "logps_train/policy_1_l": -170.54046630859375, "logps_train/policy_1_w": -129.12815856933594, "logps_train/policy_2_2": -104.29637145996094, "logps_train/policy_2_w": -183.5801544189453, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -0.9751136898994446, "rewards_train/1-l": -1.9885672330856323, "rewards_train/1-w": 2.8481225967407227, "rewards_train/2-2": 2.8211445808410645, "rewards_train/2-w": -1.033015489578247, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.836689829826355, "rewards_train/margins_1": 3.8232362866401672, "rewards_train/margins_2": 3.8541600704193115, "step": 630 }, { "epoch": 1.89, "logps_train/policy_1_2": -163.30905151367188, "logps_train/policy_1_l": -164.974365234375, "logps_train/policy_1_w": -101.17268371582031, "logps_train/policy_2_2": -109.66283416748047, "logps_train/policy_2_w": -160.78179931640625, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -125.5, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.3660616874694824, "rewards_train/1-l": -2.425097942352295, "rewards_train/1-w": 2.4538984298706055, "rewards_train/2-2": 2.251685380935669, "rewards_train/2-w": -1.306890606880188, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.8789963722229, "rewards_train/margins_1": 3.819960117340088, "rewards_train/margins_2": 3.558575987815857, "step": 631 }, { "epoch": 1.89, "logps_train/policy_1_2": -227.80023193359375, "logps_train/policy_1_l": -165.63958740234375, "logps_train/policy_1_w": -128.0172576904297, "logps_train/policy_2_2": -137.91934204101562, "logps_train/policy_2_w": -216.53384399414062, "logps_train/ref_1_2": -198.0, "logps_train/ref_1_l": -142.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -169.0, "logps_train/ref_2_w": -187.0, "rewards_train/1-2": -3.005023241043091, "rewards_train/1-l": -2.3430604934692383, "rewards_train/1-w": 3.0115561485290527, "rewards_train/2-2": 3.0760843753814697, "rewards_train/2-w": -2.9659318923950195, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.354616641998291, "rewards_train/margins_1": 6.0165793895721436, "rewards_train/margins_2": 6.042016267776489, "step": 631 }, { "epoch": 1.89, "logps_train/policy_1_2": -205.23385620117188, "logps_train/policy_1_l": -160.7471466064453, "logps_train/policy_1_w": -127.29438781738281, "logps_train/policy_2_2": -143.05841064453125, "logps_train/policy_2_w": -190.53048706054688, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.1144506931304932, "rewards_train/1-l": -1.780184268951416, "rewards_train/1-w": 3.458451509475708, "rewards_train/2-2": 3.1412034034729004, "rewards_train/2-w": -0.8530490398406982, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.238635778427124, "rewards_train/margins_1": 4.572902202606201, "rewards_train/margins_2": 3.9942524433135986, "step": 631 }, { "epoch": 1.89, "logps_train/policy_1_2": -218.1683349609375, "logps_train/policy_1_l": -164.7415771484375, "logps_train/policy_1_w": -94.72908020019531, "logps_train/policy_2_2": -140.97640991210938, "logps_train/policy_2_w": -169.2960662841797, "logps_train/ref_1_2": -200.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -175.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -1.848866581916809, "rewards_train/1-l": -2.09149169921875, "rewards_train/1-w": 2.8599047660827637, "rewards_train/2-2": 3.430485725402832, "rewards_train/2-w": -1.7417166233062744, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.951396465301514, "rewards_train/margins_1": 4.708771347999573, "rewards_train/margins_2": 5.1722023487091064, "step": 631 }, { "epoch": 1.89, "learning_rate": 4.391240385117623e-08, "loss": 0.5379, "step": 632 }, { "epoch": 1.89, "logps_train/policy_1_2": -115.17542266845703, "logps_train/policy_1_l": -88.41136169433594, "logps_train/policy_1_w": -67.96864318847656, "logps_train/policy_2_2": -67.61388397216797, "logps_train/policy_2_w": -117.4354476928711, "logps_train/ref_1_2": -101.0, "logps_train/ref_1_l": -75.0, "logps_train/ref_1_w": -88.5, "logps_train/ref_2_2": -87.0, "logps_train/ref_2_w": -104.5, "rewards_train/1-2": -1.3909798860549927, "rewards_train/1-l": -1.3704330921173096, "rewards_train/1-w": 2.045713424682617, "rewards_train/2-2": 1.973059892654419, "rewards_train/2-w": -1.274794578552246, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.4161465167999268, "rewards_train/margins_1": 3.43669331073761, "rewards_train/margins_2": 3.247854471206665, "step": 632 }, { "epoch": 1.89, "logps_train/policy_1_2": -237.48532104492188, "logps_train/policy_1_l": -190.6907501220703, "logps_train/policy_1_w": -157.44271850585938, "logps_train/policy_2_2": -166.0681610107422, "logps_train/policy_2_w": -228.57388305664062, "logps_train/ref_1_2": -230.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -198.0, "logps_train/ref_2_2": -204.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -0.7336889505386353, "rewards_train/1-l": -1.8019369840621948, "rewards_train/1-w": 4.035807132720947, "rewards_train/2-2": 3.8095903396606445, "rewards_train/2-w": -0.6819980144500732, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.837744116783142, "rewards_train/margins_1": 4.7694960832595825, "rewards_train/margins_2": 4.491588354110718, "step": 632 }, { "epoch": 1.89, "logps_train/policy_1_2": -187.00332641601562, "logps_train/policy_1_l": -193.9583740234375, "logps_train/policy_1_w": -126.66015625, "logps_train/policy_2_2": -121.0948715209961, "logps_train/policy_2_w": -200.9529266357422, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.4604884386062622, "rewards_train/1-l": -2.4974002838134766, "rewards_train/1-w": 3.519237995147705, "rewards_train/2-2": 3.4241065979003906, "rewards_train/2-w": -1.285527229309082, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.016638278961182, "rewards_train/margins_1": 4.979726433753967, "rewards_train/margins_2": 4.709633827209473, "step": 632 }, { "epoch": 1.89, "logps_train/policy_1_2": -145.13214111328125, "logps_train/policy_1_l": -159.16908264160156, "logps_train/policy_1_w": -126.11114501953125, "logps_train/policy_2_2": -86.6493148803711, "logps_train/policy_2_w": -194.17062377929688, "logps_train/ref_1_2": -132.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -110.5, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -1.272979736328125, "rewards_train/1-l": -1.8198376893997192, "rewards_train/1-w": 2.8902528285980225, "rewards_train/2-2": 2.369443416595459, "rewards_train/2-w": -1.7379610538482666, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.710090517997742, "rewards_train/margins_1": 4.1632325649261475, "rewards_train/margins_2": 4.107404470443726, "step": 632 }, { "epoch": 1.9, "logps_train/policy_1_2": -150.5757293701172, "logps_train/policy_1_l": -109.90373992919922, "logps_train/policy_1_w": -99.64204406738281, "logps_train/policy_2_2": -88.354248046875, "logps_train/policy_2_w": -176.63877868652344, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -92.0, "logps_train/ref_1_w": -131.0, "logps_train/ref_2_2": -111.5, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -2.109574556350708, "rewards_train/1-l": -1.8053641319274902, "rewards_train/1-w": 3.1063032150268555, "rewards_train/2-2": 2.3251705169677734, "rewards_train/2-w": -1.9712998867034912, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.911667346954346, "rewards_train/margins_1": 5.2158777713775635, "rewards_train/margins_2": 4.296470403671265, "step": 633 }, { "epoch": 1.9, "logps_train/policy_1_2": -167.8336639404297, "logps_train/policy_1_l": -163.99278259277344, "logps_train/policy_1_w": -129.6538848876953, "logps_train/policy_2_2": -98.38371276855469, "logps_train/policy_2_w": -198.69386291503906, "logps_train/ref_1_2": -145.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -160.0, "logps_train/ref_2_2": -120.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -2.3103203773498535, "rewards_train/1-l": -2.3222756385803223, "rewards_train/1-w": 3.049259662628174, "rewards_train/2-2": 2.177058696746826, "rewards_train/2-w": -1.4801299571990967, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.371535301208496, "rewards_train/margins_1": 5.359580039978027, "rewards_train/margins_2": 3.657188653945923, "step": 633 }, { "epoch": 1.9, "logps_train/policy_1_2": -196.5697479248047, "logps_train/policy_1_l": -171.56228637695312, "logps_train/policy_1_w": -111.51262664794922, "logps_train/policy_2_2": -129.62631225585938, "logps_train/policy_2_w": -175.50076293945312, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -162.0, "rewards_train/1-2": -1.3757247924804688, "rewards_train/1-l": -2.239431619644165, "rewards_train/1-w": 2.843658685684204, "rewards_train/2-2": 2.8272125720977783, "rewards_train/2-w": -1.3438259363174438, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.083090305328369, "rewards_train/margins_1": 4.219383478164673, "rewards_train/margins_2": 4.171038508415222, "step": 633 }, { "epoch": 1.9, "logps_train/policy_1_2": -190.1703643798828, "logps_train/policy_1_l": -151.2101593017578, "logps_train/policy_1_w": -109.85420227050781, "logps_train/policy_2_2": -121.27090454101562, "logps_train/policy_2_w": -172.73825073242188, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -142.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -1.4451611042022705, "rewards_train/1-l": -2.0411338806152344, "rewards_train/1-w": 3.1964166164398193, "rewards_train/2-2": 3.4744720458984375, "rewards_train/2-w": -0.9911109805107117, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.237550497055054, "rewards_train/margins_1": 4.64157772064209, "rewards_train/margins_2": 4.465583026409149, "step": 633 }, { "epoch": 1.9, "learning_rate": 3.942355513100793e-08, "loss": 0.514, "step": 634 }, { "epoch": 1.9, "logps_train/policy_1_2": -148.97613525390625, "logps_train/policy_1_l": -125.35913848876953, "logps_train/policy_1_w": -93.53621673583984, "logps_train/policy_2_2": -98.80290222167969, "logps_train/policy_2_w": -145.0671844482422, "logps_train/ref_1_2": -138.0, "logps_train/ref_1_l": -109.0, "logps_train/ref_1_w": -119.5, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -134.0, "rewards_train/1-2": -1.0784733295440674, "rewards_train/1-l": -1.6544201374053955, "rewards_train/1-w": 2.5751376152038574, "rewards_train/2-2": 2.451350212097168, "rewards_train/2-w": -1.08694326877594, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.229557752609253, "rewards_train/margins_1": 3.653610944747925, "rewards_train/margins_2": 3.538293480873108, "step": 634 }, { "epoch": 1.9, "logps_train/policy_1_2": -168.05809020996094, "logps_train/policy_1_l": -170.79649353027344, "logps_train/policy_1_w": -84.82432556152344, "logps_train/policy_2_2": -115.83091735839844, "logps_train/policy_2_w": -135.23419189453125, "logps_train/ref_1_2": -155.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -108.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": -1.2787580490112305, "rewards_train/1-l": -3.0479116439819336, "rewards_train/1-w": 2.3201065063476562, "rewards_train/2-2": 2.365687370300293, "rewards_train/2-w": -0.9605274200439453, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.36801815032959, "rewards_train/margins_1": 3.5988645553588867, "rewards_train/margins_2": 3.3262147903442383, "step": 634 }, { "epoch": 1.9, "logps_train/policy_1_2": -212.00115966796875, "logps_train/policy_1_l": -224.013427734375, "logps_train/policy_1_w": -169.285888671875, "logps_train/policy_2_2": -137.89041137695312, "logps_train/policy_2_w": -245.73890686035156, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -196.0, "logps_train/ref_1_w": -202.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -1.887616515159607, "rewards_train/1-l": -2.8631601333618164, "rewards_train/1-w": 3.263599395751953, "rewards_train/2-2": 2.9443585872650146, "rewards_train/2-w": -1.9840465784072876, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.1267595291137695, "rewards_train/margins_1": 5.15121591091156, "rewards_train/margins_2": 4.928405165672302, "step": 634 }, { "epoch": 1.9, "logps_train/policy_1_2": -187.93544006347656, "logps_train/policy_1_l": -178.15655517578125, "logps_train/policy_1_w": -123.0899887084961, "logps_train/policy_2_2": -119.24789428710938, "logps_train/policy_2_w": -186.27078247070312, "logps_train/ref_1_2": -171.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -155.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.7448135614395142, "rewards_train/1-l": -2.440556764602661, "rewards_train/1-w": 3.192661762237549, "rewards_train/2-2": 2.786050319671631, "rewards_train/2-w": -1.4548370838165283, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.63321852684021, "rewards_train/margins_1": 4.937475323677063, "rewards_train/margins_2": 4.240887403488159, "step": 634 }, { "epoch": 1.9, "logps_train/policy_1_2": -194.19668579101562, "logps_train/policy_1_l": -243.78392028808594, "logps_train/policy_1_w": -150.12359619140625, "logps_train/policy_2_2": -124.83116149902344, "logps_train/policy_2_w": -236.17660522460938, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -215.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -220.0, "rewards_train/1-2": -1.328263521194458, "rewards_train/1-l": -2.845506191253662, "rewards_train/1-w": 3.555609703063965, "rewards_train/2-2": 3.069422960281372, "rewards_train/2-w": -1.6168792247772217, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.401115894317627, "rewards_train/margins_1": 4.883873224258423, "rewards_train/margins_2": 4.686302185058594, "step": 635 }, { "epoch": 1.9, "logps_train/policy_1_2": -155.83433532714844, "logps_train/policy_1_l": -144.78286743164062, "logps_train/policy_1_w": -119.08436584472656, "logps_train/policy_2_2": -110.10154724121094, "logps_train/policy_2_w": -169.02789306640625, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -127.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -163.0, "rewards_train/1-2": -0.26976296305656433, "rewards_train/1-l": -1.8200337886810303, "rewards_train/1-w": 2.996250629425049, "rewards_train/2-2": 3.0449230670928955, "rewards_train/2-w": -0.6020079255104065, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.816284418106079, "rewards_train/margins_1": 3.266013592481613, "rewards_train/margins_2": 3.646930992603302, "step": 635 }, { "epoch": 1.9, "logps_train/policy_1_2": -167.71116638183594, "logps_train/policy_1_l": -109.1501235961914, "logps_train/policy_1_w": -98.06488037109375, "logps_train/policy_2_2": -101.55870056152344, "logps_train/policy_2_w": -160.4488067626953, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -95.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -128.0, "logps_train/ref_2_w": -139.0, "rewards_train/1-2": -2.0249009132385254, "rewards_train/1-l": -1.4473117589950562, "rewards_train/1-w": 2.7018373012542725, "rewards_train/2-2": 2.632754325866699, "rewards_train/2-w": -2.1617746353149414, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.149149060249329, "rewards_train/margins_1": 4.726738214492798, "rewards_train/margins_2": 4.794528961181641, "step": 635 }, { "epoch": 1.9, "logps_train/policy_1_2": -204.59738159179688, "logps_train/policy_1_l": -155.5112762451172, "logps_train/policy_1_w": -114.96277618408203, "logps_train/policy_2_2": -134.5139923095703, "logps_train/policy_2_w": -191.70668029785156, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -137.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -2.089620590209961, "rewards_train/1-l": -1.83940851688385, "rewards_train/1-w": 3.547863245010376, "rewards_train/2-2": 2.5226240158081055, "rewards_train/2-w": -1.529261827468872, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.75, "rewards_train/margins": 5.387271761894226, "rewards_train/margins_1": 5.637483835220337, "rewards_train/margins_2": 4.0518858432769775, "step": 635 }, { "epoch": 1.9, "learning_rate": 3.517485433412987e-08, "loss": 0.6567, "step": 636 }, { "epoch": 1.9, "logps_train/policy_1_2": -176.80386352539062, "logps_train/policy_1_l": -146.21498107910156, "logps_train/policy_1_w": -75.79707336425781, "logps_train/policy_2_2": -108.81258392333984, "logps_train/policy_2_w": -134.4823455810547, "logps_train/ref_1_2": -152.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -117.5, "rewards_train/1-2": -2.498354911804199, "rewards_train/1-l": -2.2318496704101562, "rewards_train/1-w": 2.011357307434082, "rewards_train/2-2": 2.13749098777771, "rewards_train/2-w": -1.7048749923706055, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.243206977844238, "rewards_train/margins_1": 4.509712219238281, "rewards_train/margins_2": 3.8423659801483154, "step": 636 }, { "epoch": 1.9, "logps_train/policy_1_2": -216.32676696777344, "logps_train/policy_1_l": -238.88861083984375, "logps_train/policy_1_w": -158.5745086669922, "logps_train/policy_2_2": -135.18695068359375, "logps_train/policy_2_w": -244.800537109375, "logps_train/ref_1_2": -197.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -197.0, "logps_train/ref_2_2": -167.0, "logps_train/ref_2_w": -224.0, "rewards_train/1-2": -1.9240835905075073, "rewards_train/1-l": -3.511030435562134, "rewards_train/1-w": 3.7917685508728027, "rewards_train/2-2": 3.158257484436035, "rewards_train/2-w": -2.1238021850585938, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.3027989864349365, "rewards_train/margins_1": 5.71585214138031, "rewards_train/margins_2": 5.282059669494629, "step": 636 }, { "epoch": 1.9, "logps_train/policy_1_2": -215.2845916748047, "logps_train/policy_1_l": -231.06863403320312, "logps_train/policy_1_w": -175.25668334960938, "logps_train/policy_2_2": -155.72573852539062, "logps_train/policy_2_w": -253.21133422851562, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -212.0, "logps_train/ref_1_w": -213.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -241.0, "rewards_train/1-2": -0.7211837768554688, "rewards_train/1-l": -1.9266875982284546, "rewards_train/1-w": 3.7813639640808105, "rewards_train/2-2": 3.397615909576416, "rewards_train/2-w": -1.1945700645446777, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.708051562309265, "rewards_train/margins_1": 4.502547740936279, "rewards_train/margins_2": 4.592185974121094, "step": 636 }, { "epoch": 1.9, "logps_train/policy_1_2": -124.09365844726562, "logps_train/policy_1_l": -149.71414184570312, "logps_train/policy_1_w": -117.92091369628906, "logps_train/policy_2_2": -81.18693542480469, "logps_train/policy_2_w": -192.0454864501953, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -1.1425198316574097, "rewards_train/1-l": -1.7040318250656128, "rewards_train/1-w": 2.979588031768799, "rewards_train/2-2": 1.8373124599456787, "rewards_train/2-w": -2.337947130203247, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.683619856834412, "rewards_train/margins_1": 4.1221078634262085, "rewards_train/margins_2": 4.175259590148926, "step": 636 }, { "epoch": 1.91, "logps_train/policy_1_2": -229.85971069335938, "logps_train/policy_1_l": -156.2166748046875, "logps_train/policy_1_w": -129.8866424560547, "logps_train/policy_2_2": -142.14268493652344, "logps_train/policy_2_w": -200.39443969726562, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -2.2273788452148438, "rewards_train/1-l": -1.5624182224273682, "rewards_train/1-w": 3.0922656059265137, "rewards_train/2-2": 3.693544387817383, "rewards_train/2-w": -1.4707093238830566, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.654683828353882, "rewards_train/margins_1": 5.319644451141357, "rewards_train/margins_2": 5.1642537117004395, "step": 637 }, { "epoch": 1.91, "logps_train/policy_1_2": -185.19338989257812, "logps_train/policy_1_l": -205.22874450683594, "logps_train/policy_1_w": -128.31234741210938, "logps_train/policy_2_2": -121.58895874023438, "logps_train/policy_2_w": -201.84857177734375, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -180.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -1.0666046142578125, "rewards_train/1-l": -2.5692124366760254, "rewards_train/1-w": 3.3416171073913574, "rewards_train/2-2": 2.9786038398742676, "rewards_train/2-w": -1.37001371383667, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.910829544067383, "rewards_train/margins_1": 4.40822172164917, "rewards_train/margins_2": 4.3486175537109375, "step": 637 }, { "epoch": 1.91, "logps_train/policy_1_2": -193.21432495117188, "logps_train/policy_1_l": -132.0843963623047, "logps_train/policy_1_w": -112.08426666259766, "logps_train/policy_2_2": -133.05833435058594, "logps_train/policy_2_w": -167.34127807617188, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.9648760557174683, "rewards_train/1-l": -2.1230149269104004, "rewards_train/1-w": 2.918989658355713, "rewards_train/2-2": 2.458890438079834, "rewards_train/2-w": -0.7898406982421875, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.042004585266113, "rewards_train/margins_1": 4.883865714073181, "rewards_train/margins_2": 3.2487311363220215, "step": 637 }, { "epoch": 1.91, "logps_train/policy_1_2": -200.25680541992188, "logps_train/policy_1_l": -180.07894897460938, "logps_train/policy_1_w": -133.31307983398438, "logps_train/policy_2_2": -121.54249572753906, "logps_train/policy_2_w": -191.7407989501953, "logps_train/ref_1_2": -180.0, "logps_train/ref_1_l": -151.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -2.052633285522461, "rewards_train/1-l": -2.856332302093506, "rewards_train/1-w": 2.7563862800598145, "rewards_train/2-2": 3.187156915664673, "rewards_train/2-w": -1.116852879524231, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.61271858215332, "rewards_train/margins_1": 4.809019565582275, "rewards_train/margins_2": 4.304009795188904, "step": 637 }, { "epoch": 1.91, "learning_rate": 3.1166716126249664e-08, "loss": 0.5811, "step": 638 }, { "epoch": 1.91, "logps_train/policy_1_2": -215.20770263671875, "logps_train/policy_1_l": -150.52706909179688, "logps_train/policy_1_w": -105.42975616455078, "logps_train/policy_2_2": -136.3404541015625, "logps_train/policy_2_w": -164.24661254882812, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -153.0, "rewards_train/1-2": -2.20255708694458, "rewards_train/1-l": -1.9102931022644043, "rewards_train/1-w": 2.6988039016723633, "rewards_train/2-2": 3.150378704071045, "rewards_train/2-w": -1.1160664558410645, "rewards_train/accuracies": 0.875, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.609097003936768, "rewards_train/margins_1": 4.901360988616943, "rewards_train/margins_2": 4.266445159912109, "step": 638 }, { "epoch": 1.91, "logps_train/policy_1_2": -183.15353393554688, "logps_train/policy_1_l": -179.143310546875, "logps_train/policy_1_w": -115.93411254882812, "logps_train/policy_2_2": -123.15601348876953, "logps_train/policy_2_w": -189.90994262695312, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -1.2833220958709717, "rewards_train/1-l": -2.681321859359741, "rewards_train/1-w": 3.0761194229125977, "rewards_train/2-2": 2.6105706691741943, "rewards_train/2-w": -1.5441184043884277, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.757441282272339, "rewards_train/margins_1": 4.359441518783569, "rewards_train/margins_2": 4.154689073562622, "step": 638 }, { "epoch": 1.91, "logps_train/policy_1_2": -176.3164825439453, "logps_train/policy_1_l": -159.8753204345703, "logps_train/policy_1_w": -108.0616455078125, "logps_train/policy_2_2": -116.42317199707031, "logps_train/policy_2_w": -174.7748260498047, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -136.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.4013735055923462, "rewards_train/1-l": -2.3873367309570312, "rewards_train/1-w": 2.8579964637756348, "rewards_train/2-2": 2.4581704139709473, "rewards_train/2-w": -1.7792401313781738, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.245333194732666, "rewards_train/margins_1": 4.259369969367981, "rewards_train/margins_2": 4.237410545349121, "step": 638 }, { "epoch": 1.91, "logps_train/policy_1_2": -201.57070922851562, "logps_train/policy_1_l": -204.2456512451172, "logps_train/policy_1_w": -119.39368438720703, "logps_train/policy_2_2": -131.11978149414062, "logps_train/policy_2_w": -186.70706176757812, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -171.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.6555088758468628, "rewards_train/1-l": -3.3468098640441895, "rewards_train/1-w": 2.8879752159118652, "rewards_train/2-2": 2.869662284851074, "rewards_train/2-w": -1.6414096355438232, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.234785079956055, "rewards_train/margins_1": 4.543484091758728, "rewards_train/margins_2": 4.5110719203948975, "step": 638 }, { "epoch": 1.91, "logps_train/policy_1_2": -184.44412231445312, "logps_train/policy_1_l": -129.8791046142578, "logps_train/policy_1_w": -125.93330383300781, "logps_train/policy_2_2": -130.6073760986328, "logps_train/policy_2_w": -180.21153259277344, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -112.5, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -156.0, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.0885533094406128, "rewards_train/1-l": -1.7431845664978027, "rewards_train/1-w": 2.7336227893829346, "rewards_train/2-2": 2.5650436878204346, "rewards_train/2-w": -0.8133409023284912, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.476807355880737, "rewards_train/margins_1": 3.8221760988235474, "rewards_train/margins_2": 3.378384590148926, "step": 639 }, { "epoch": 1.91, "logps_train/policy_1_2": -188.80618286132812, "logps_train/policy_1_l": -112.15544128417969, "logps_train/policy_1_w": -95.16114807128906, "logps_train/policy_2_2": -117.72325897216797, "logps_train/policy_2_w": -162.3223876953125, "logps_train/ref_1_2": -170.0, "logps_train/ref_1_l": -97.0, "logps_train/ref_1_w": -123.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -1.9373819828033447, "rewards_train/1-l": -1.5078294277191162, "rewards_train/1-w": 2.777122974395752, "rewards_train/2-2": 2.8887581825256348, "rewards_train/2-w": -1.445373296737671, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.284952402114868, "rewards_train/margins_1": 4.714504957199097, "rewards_train/margins_2": 4.334131479263306, "step": 639 }, { "epoch": 1.91, "logps_train/policy_1_2": -184.83309936523438, "logps_train/policy_1_l": -109.9257583618164, "logps_train/policy_1_w": -81.87879943847656, "logps_train/policy_2_2": -121.67647552490234, "logps_train/policy_2_w": -134.60516357421875, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -94.5, "logps_train/ref_1_w": -105.0, "logps_train/ref_2_2": -148.0, "logps_train/ref_2_w": -127.5, "rewards_train/1-2": -1.582918643951416, "rewards_train/1-l": -1.5590310096740723, "rewards_train/1-w": 2.3242292404174805, "rewards_train/2-2": 2.6734652519226074, "rewards_train/2-w": -0.7073914408683777, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.8832602500915527, "rewards_train/margins_1": 3.9071478843688965, "rewards_train/margins_2": 3.380856692790985, "step": 639 }, { "epoch": 1.91, "logps_train/policy_1_2": -185.3014373779297, "logps_train/policy_1_l": -166.68756103515625, "logps_train/policy_1_w": -90.18258666992188, "logps_train/policy_2_2": -116.57111358642578, "logps_train/policy_2_w": -153.94613647460938, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.9122235774993896, "rewards_train/1-l": -2.3228960037231445, "rewards_train/1-w": 2.433657646179199, "rewards_train/2-2": 2.5759689807891846, "rewards_train/2-w": -1.4077985286712646, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.756553649902344, "rewards_train/margins_1": 4.345881223678589, "rewards_train/margins_2": 3.983767509460449, "step": 639 }, { "epoch": 1.92, "learning_rate": 2.739953169458992e-08, "loss": 0.587, "step": 640 }, { "epoch": 1.92, "logps_train/policy_1_2": -252.45408630371094, "logps_train/policy_1_l": -205.61099243164062, "logps_train/policy_1_w": -158.59727478027344, "logps_train/policy_2_2": -169.02899169921875, "logps_train/policy_2_w": -239.76483154296875, "logps_train/ref_1_2": -234.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -206.0, "logps_train/ref_2_w": -218.0, "rewards_train/1-2": -1.9106433391571045, "rewards_train/1-l": -2.302114725112915, "rewards_train/1-w": 3.4730842113494873, "rewards_train/2-2": 3.6478822231292725, "rewards_train/2-w": -2.1444509029388428, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.775198936462402, "rewards_train/margins_1": 5.383727550506592, "rewards_train/margins_2": 5.792333126068115, "step": 640 }, { "epoch": 1.92, "logps_train/policy_1_2": -132.09974670410156, "logps_train/policy_1_l": -103.9238510131836, "logps_train/policy_1_w": -76.19371032714844, "logps_train/policy_2_2": -80.39090728759766, "logps_train/policy_2_w": -131.4158172607422, "logps_train/ref_1_2": -118.5, "logps_train/ref_1_l": -82.5, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -100.0, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": -1.3755989074707031, "rewards_train/1-l": -2.1371984481811523, "rewards_train/1-w": 2.548598289489746, "rewards_train/2-2": 1.955538272857666, "rewards_train/2-w": -1.5056438446044922, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.685796737670898, "rewards_train/margins_1": 3.924197196960449, "rewards_train/margins_2": 3.461182117462158, "step": 640 }, { "epoch": 1.92, "logps_train/policy_1_2": -199.32852172851562, "logps_train/policy_1_l": -186.62551879882812, "logps_train/policy_1_w": -155.66836547851562, "logps_train/policy_2_2": -128.9626007080078, "logps_train/policy_2_w": -246.66976928710938, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -161.0, "logps_train/ref_1_w": -196.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -1.4726953506469727, "rewards_train/1-l": -2.5879924297332764, "rewards_train/1-w": 4.028475284576416, "rewards_train/2-2": 2.9658493995666504, "rewards_train/2-w": -2.420102119445801, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.616467714309692, "rewards_train/margins_1": 5.501170635223389, "rewards_train/margins_2": 5.385951519012451, "step": 640 }, { "epoch": 1.92, "logps_train/policy_1_2": -181.47750854492188, "logps_train/policy_1_l": -162.02285766601562, "logps_train/policy_1_w": -141.68902587890625, "logps_train/policy_2_2": -119.64988708496094, "logps_train/policy_2_w": -220.09210205078125, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.4274389743804932, "rewards_train/1-l": -2.106973886489868, "rewards_train/1-w": 3.291839838027954, "rewards_train/2-2": 2.7730965614318848, "rewards_train/2-w": -2.2263994216918945, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.398813724517822, "rewards_train/margins_1": 4.719278812408447, "rewards_train/margins_2": 4.999495983123779, "step": 640 }, { "epoch": 1.92, "logps_train/policy_1_2": -154.94100952148438, "logps_train/policy_1_l": -162.45201110839844, "logps_train/policy_1_w": -118.44078063964844, "logps_train/policy_2_2": -103.61531829833984, "logps_train/policy_2_w": -182.0152587890625, "logps_train/ref_1_2": -149.0, "logps_train/ref_1_l": -138.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -0.5890226364135742, "rewards_train/1-l": -2.4293324947357178, "rewards_train/1-w": 3.050452947616577, "rewards_train/2-2": 2.887418508529663, "rewards_train/2-w": -0.872423529624939, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.479785442352295, "rewards_train/margins_1": 3.6394755840301514, "rewards_train/margins_2": 3.759842038154602, "step": 641 }, { "epoch": 1.92, "logps_train/policy_1_2": -244.45318603515625, "logps_train/policy_1_l": -230.856201171875, "logps_train/policy_1_w": -124.43283081054688, "logps_train/policy_2_2": -156.5011749267578, "logps_train/policy_2_w": -211.3946990966797, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -200.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -188.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -2.2875046730041504, "rewards_train/1-l": -3.0864007472991943, "rewards_train/1-w": 2.9598419666290283, "rewards_train/2-2": 3.178008556365967, "rewards_train/2-w": -2.036345958709717, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.046242713928223, "rewards_train/margins_1": 5.247346639633179, "rewards_train/margins_2": 5.214354515075684, "step": 641 }, { "epoch": 1.92, "logps_train/policy_1_2": -188.464111328125, "logps_train/policy_1_l": -196.0904083251953, "logps_train/policy_1_w": -133.04104614257812, "logps_train/policy_2_2": -120.05097198486328, "logps_train/policy_2_w": -214.93670654296875, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -173.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -195.0, "rewards_train/1-2": -1.554223895072937, "rewards_train/1-l": -2.290681838989258, "rewards_train/1-w": 3.30605149269104, "rewards_train/2-2": 2.607012987136841, "rewards_train/2-w": -1.9885940551757812, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.596733331680298, "rewards_train/margins_1": 4.860275387763977, "rewards_train/margins_2": 4.595607042312622, "step": 641 }, { "epoch": 1.92, "logps_train/policy_1_2": -130.8665771484375, "logps_train/policy_1_l": -94.4398193359375, "logps_train/policy_1_w": -71.35569763183594, "logps_train/policy_2_2": -73.01456451416016, "logps_train/policy_2_w": -117.67303466796875, "logps_train/ref_1_2": -114.0, "logps_train/ref_1_l": -80.5, "logps_train/ref_1_w": -91.0, "logps_train/ref_2_2": -97.0, "logps_train/ref_2_w": -109.5, "rewards_train/1-2": -1.6895883083343506, "rewards_train/1-l": -1.3788697719573975, "rewards_train/1-w": 1.9695087671279907, "rewards_train/2-2": 2.3991291522979736, "rewards_train/2-w": -0.8225764036178589, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.348378539085388, "rewards_train/margins_1": 3.6590970754623413, "rewards_train/margins_2": 3.2217055559158325, "step": 641 }, { "epoch": 1.92, "learning_rate": 2.387366870971103e-08, "loss": 0.5435, "step": 642 }, { "epoch": 1.92, "logps_train/policy_1_2": -179.878173828125, "logps_train/policy_1_l": -154.19052124023438, "logps_train/policy_1_w": -102.39131164550781, "logps_train/policy_2_2": -114.11298370361328, "logps_train/policy_2_w": -150.9739990234375, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -143.0, "rewards_train/1-2": -1.7350832223892212, "rewards_train/1-l": -2.0453219413757324, "rewards_train/1-w": 2.2740530967712402, "rewards_train/2-2": 2.6957335472106934, "rewards_train/2-w": -0.8173210620880127, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.319375038146973, "rewards_train/margins_1": 4.009136319160461, "rewards_train/margins_2": 3.513054609298706, "step": 642 }, { "epoch": 1.92, "logps_train/policy_1_2": -201.436279296875, "logps_train/policy_1_l": -171.64459228515625, "logps_train/policy_1_w": -131.77249145507812, "logps_train/policy_2_2": -140.59230041503906, "logps_train/policy_2_w": -183.13702392578125, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -153.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -173.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -1.0227303504943848, "rewards_train/1-l": -1.895587682723999, "rewards_train/1-w": 2.660349130630493, "rewards_train/2-2": 3.21694278717041, "rewards_train/2-w": -0.6793279647827148, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.555936813354492, "rewards_train/margins_1": 3.683079481124878, "rewards_train/margins_2": 3.896270751953125, "step": 642 }, { "epoch": 1.92, "logps_train/policy_1_2": -241.66546630859375, "logps_train/policy_1_l": -193.3682403564453, "logps_train/policy_1_w": -149.1383819580078, "logps_train/policy_2_2": -159.62747192382812, "logps_train/policy_2_w": -219.83639526367188, "logps_train/ref_1_2": -223.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -193.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -1.8243587017059326, "rewards_train/1-l": -2.7189536094665527, "rewards_train/1-w": 3.2152628898620605, "rewards_train/2-2": 3.34428334236145, "rewards_train/2-w": -1.2687956094741821, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.934216499328613, "rewards_train/margins_1": 5.039621591567993, "rewards_train/margins_2": 4.613078951835632, "step": 642 }, { "epoch": 1.92, "logps_train/policy_1_2": -246.35516357421875, "logps_train/policy_1_l": -203.49905395507812, "logps_train/policy_1_w": -176.6227569580078, "logps_train/policy_2_2": -167.46609497070312, "logps_train/policy_2_w": -254.09713745117188, "logps_train/ref_1_2": -226.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -214.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -235.0, "rewards_train/1-2": -2.051143169403076, "rewards_train/1-l": -2.195802688598633, "rewards_train/1-w": 3.6951470375061035, "rewards_train/2-2": 3.4303441047668457, "rewards_train/2-w": -1.9378407001495361, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.890949726104736, "rewards_train/margins_1": 5.74629020690918, "rewards_train/margins_2": 5.368184804916382, "step": 642 }, { "epoch": 1.93, "logps_train/policy_1_2": -220.58905029296875, "logps_train/policy_1_l": -213.8255615234375, "logps_train/policy_1_w": -145.44464111328125, "logps_train/policy_2_2": -140.3489227294922, "logps_train/policy_2_w": -230.80213928222656, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -208.0, "rewards_train/1-2": -2.1412792205810547, "rewards_train/1-l": -1.9737675189971924, "rewards_train/1-w": 2.836540460586548, "rewards_train/2-2": 2.840010166168213, "rewards_train/2-w": -2.2214252948760986, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.81030797958374, "rewards_train/margins_1": 4.9778196811676025, "rewards_train/margins_2": 5.0614354610443115, "step": 643 }, { "epoch": 1.93, "logps_train/policy_1_2": -174.03805541992188, "logps_train/policy_1_l": -173.31060791015625, "logps_train/policy_1_w": -123.72357177734375, "logps_train/policy_2_2": -107.56007385253906, "logps_train/policy_2_w": -192.55703735351562, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -179.0, "rewards_train/1-2": -1.4353535175323486, "rewards_train/1-l": -1.8892961740493774, "rewards_train/1-w": 2.80698823928833, "rewards_train/2-2": 2.874150514602661, "rewards_train/2-w": -1.3770906925201416, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.6962844133377075, "rewards_train/margins_1": 4.242341756820679, "rewards_train/margins_2": 4.251241207122803, "step": 643 }, { "epoch": 1.93, "logps_train/policy_1_2": -221.60482788085938, "logps_train/policy_1_l": -192.26422119140625, "logps_train/policy_1_w": -141.47952270507812, "logps_train/policy_2_2": -148.67076110839844, "logps_train/policy_2_w": -220.38674926757812, "logps_train/ref_1_2": -206.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -174.0, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -1.6130213737487793, "rewards_train/1-l": -2.3274948596954346, "rewards_train/1-w": 3.2110326290130615, "rewards_train/2-2": 3.4454245567321777, "rewards_train/2-w": -1.8285186290740967, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.538527488708496, "rewards_train/margins_1": 4.824054002761841, "rewards_train/margins_2": 5.273943185806274, "step": 643 }, { "epoch": 1.93, "logps_train/policy_1_2": -104.36575317382812, "logps_train/policy_1_l": -100.21711730957031, "logps_train/policy_1_w": -78.6553955078125, "logps_train/policy_2_2": -63.24067306518555, "logps_train/policy_2_w": -145.45327758789062, "logps_train/ref_1_2": -91.5, "logps_train/ref_1_l": -85.0, "logps_train/ref_1_w": -101.5, "logps_train/ref_2_2": -79.0, "logps_train/ref_2_w": -123.0, "rewards_train/1-2": -1.2793490886688232, "rewards_train/1-l": -1.5382893085479736, "rewards_train/1-w": 2.317078113555908, "rewards_train/2-2": 1.5839403867721558, "rewards_train/2-w": -2.264859676361084, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.855367422103882, "rewards_train/margins_1": 3.5964272022247314, "rewards_train/margins_2": 3.8488000631332397, "step": 643 }, { "epoch": 1.93, "learning_rate": 2.0589471289624018e-08, "loss": 0.5455, "step": 644 }, { "epoch": 1.93, "logps_train/policy_1_2": -227.74046325683594, "logps_train/policy_1_l": -211.96231079101562, "logps_train/policy_1_w": -125.49333190917969, "logps_train/policy_2_2": -146.1488037109375, "logps_train/policy_2_w": -205.95907592773438, "logps_train/ref_1_2": -208.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -176.0, "logps_train/ref_2_w": -189.0, "rewards_train/1-2": -2.0162353515625, "rewards_train/1-l": -3.0278713703155518, "rewards_train/1-w": 3.2854318618774414, "rewards_train/2-2": 2.9929332733154297, "rewards_train/2-w": -1.7220792770385742, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.313303232192993, "rewards_train/margins_1": 5.301667213439941, "rewards_train/margins_2": 4.715012550354004, "step": 644 }, { "epoch": 1.93, "logps_train/policy_1_2": -159.0386962890625, "logps_train/policy_1_l": -223.0248260498047, "logps_train/policy_1_w": -111.48092651367188, "logps_train/policy_2_2": -106.88910675048828, "logps_train/policy_2_w": -166.99234008789062, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -135.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -0.8421513438224792, "rewards_train/1-l": -3.439983367919922, "rewards_train/1-w": 2.360501289367676, "rewards_train/2-2": 2.621636390686035, "rewards_train/2-w": -1.0191550254821777, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.75, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.800484657287598, "rewards_train/margins_1": 3.202652633190155, "rewards_train/margins_2": 3.640791416168213, "step": 644 }, { "epoch": 1.93, "logps_train/policy_1_2": -198.74261474609375, "logps_train/policy_1_l": -184.9329833984375, "logps_train/policy_1_w": -159.82122802734375, "logps_train/policy_2_2": -127.4204330444336, "logps_train/policy_2_w": -225.11572265625, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -164.0, "logps_train/ref_1_w": -191.0, "logps_train/ref_2_2": -159.0, "logps_train/ref_2_w": -211.0, "rewards_train/1-2": -1.79887056350708, "rewards_train/1-l": -2.1307973861694336, "rewards_train/1-w": 3.1346731185913086, "rewards_train/2-2": 3.1806135177612305, "rewards_train/2-w": -1.4576677083969116, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.265470504760742, "rewards_train/margins_1": 4.933543682098389, "rewards_train/margins_2": 4.638281226158142, "step": 644 }, { "epoch": 1.93, "logps_train/policy_1_2": -190.6554412841797, "logps_train/policy_1_l": -206.4573211669922, "logps_train/policy_1_w": -159.08216857910156, "logps_train/policy_2_2": -138.19769287109375, "logps_train/policy_2_w": -234.999267578125, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -182.0, "logps_train/ref_1_w": -194.0, "logps_train/ref_2_2": -170.0, "logps_train/ref_2_w": -221.0, "rewards_train/1-2": -0.4382004737854004, "rewards_train/1-l": -2.4393362998962402, "rewards_train/1-w": 3.4564316272735596, "rewards_train/2-2": 3.1755423545837402, "rewards_train/2-w": -1.4243407249450684, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.8957679271698, "rewards_train/margins_1": 3.89463210105896, "rewards_train/margins_2": 4.599883079528809, "step": 644 }, { "epoch": 1.93, "logps_train/policy_1_2": -183.15896606445312, "logps_train/policy_1_l": -136.89395141601562, "logps_train/policy_1_w": -74.99685668945312, "logps_train/policy_2_2": -106.43878173828125, "logps_train/policy_2_w": -133.71121215820312, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -96.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -115.0, "rewards_train/1-2": -2.301051616668701, "rewards_train/1-l": -1.943887710571289, "rewards_train/1-w": 2.104318618774414, "rewards_train/2-2": 2.78932523727417, "rewards_train/2-w": -1.8465113639831543, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.048206329345703, "rewards_train/margins_1": 4.405370235443115, "rewards_train/margins_2": 4.635836601257324, "step": 645 }, { "epoch": 1.93, "logps_train/policy_1_2": -167.5538330078125, "logps_train/policy_1_l": -202.72283935546875, "logps_train/policy_1_w": -141.90570068359375, "logps_train/policy_2_2": -112.23027038574219, "logps_train/policy_2_w": -220.98153686523438, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -0.7928816676139832, "rewards_train/1-l": -1.831953763961792, "rewards_train/1-w": 3.559039354324341, "rewards_train/2-2": 2.6718943119049072, "rewards_train/2-w": -1.4028407335281372, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.390993118286133, "rewards_train/margins_1": 4.351921021938324, "rewards_train/margins_2": 4.074735045433044, "step": 645 }, { "epoch": 1.93, "logps_train/policy_1_2": -178.7287139892578, "logps_train/policy_1_l": -186.25157165527344, "logps_train/policy_1_w": -150.36984252929688, "logps_train/policy_2_2": -114.55596160888672, "logps_train/policy_2_w": -234.21568298339844, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -215.0, "rewards_train/1-2": -1.4131050109863281, "rewards_train/1-l": -3.1364850997924805, "rewards_train/1-w": 3.329030990600586, "rewards_train/2-2": 2.68937349319458, "rewards_train/2-w": -1.9000835418701172, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.465516090393066, "rewards_train/margins_1": 4.742136001586914, "rewards_train/margins_2": 4.589457035064697, "step": 645 }, { "epoch": 1.93, "logps_train/policy_1_2": -186.56654357910156, "logps_train/policy_1_l": -148.91311645507812, "logps_train/policy_1_w": -119.47750854492188, "logps_train/policy_2_2": -124.49705505371094, "logps_train/policy_2_w": -181.9495849609375, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -123.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -169.0, "rewards_train/1-2": -1.1461069583892822, "rewards_train/1-l": -2.5940957069396973, "rewards_train/1-w": 2.9423370361328125, "rewards_train/2-2": 3.0256848335266113, "rewards_train/2-w": -1.326526403427124, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.53643274307251, "rewards_train/margins_1": 4.088443994522095, "rewards_train/margins_2": 4.352211236953735, "step": 645 }, { "epoch": 1.93, "learning_rate": 1.7547259966207708e-08, "loss": 0.5174, "step": 646 }, { "epoch": 1.93, "logps_train/policy_1_2": -186.11929321289062, "logps_train/policy_1_l": -175.6763916015625, "logps_train/policy_1_w": -100.299560546875, "logps_train/policy_2_2": -109.01712036132812, "logps_train/policy_2_w": -159.2220916748047, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -126.5, "logps_train/ref_2_2": -139.0, "logps_train/ref_2_w": -147.0, "rewards_train/1-2": -1.713882327079773, "rewards_train/1-l": -2.751819372177124, "rewards_train/1-w": 2.6286864280700684, "rewards_train/2-2": 2.9851176738739014, "rewards_train/2-w": -1.2104911804199219, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.380505800247192, "rewards_train/margins_1": 4.342568755149841, "rewards_train/margins_2": 4.195608854293823, "step": 646 }, { "epoch": 1.93, "logps_train/policy_1_2": -186.19241333007812, "logps_train/policy_1_l": -168.57139587402344, "logps_train/policy_1_w": -134.28738403320312, "logps_train/policy_2_2": -129.8314208984375, "logps_train/policy_2_w": -202.73812866210938, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -168.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.0297880172729492, "rewards_train/1-l": -2.10616397857666, "rewards_train/1-w": 3.299386978149414, "rewards_train/2-2": 2.766857624053955, "rewards_train/2-w": -1.693343162536621, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.405550956726074, "rewards_train/margins_1": 4.329174995422363, "rewards_train/margins_2": 4.460200786590576, "step": 646 }, { "epoch": 1.93, "logps_train/policy_1_2": -228.2977752685547, "logps_train/policy_1_l": -174.68222045898438, "logps_train/policy_1_w": -120.20008850097656, "logps_train/policy_2_2": -153.83908081054688, "logps_train/policy_2_w": -180.65780639648438, "logps_train/ref_1_2": -210.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -150.0, "logps_train/ref_2_2": -187.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -1.9094651937484741, "rewards_train/1-l": -1.917245626449585, "rewards_train/1-w": 2.953038215637207, "rewards_train/2-2": 3.3008575439453125, "rewards_train/2-w": -1.0763287544250488, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.870283842086792, "rewards_train/margins_1": 4.862503409385681, "rewards_train/margins_2": 4.377186298370361, "step": 646 }, { "epoch": 1.93, "logps_train/policy_1_2": -223.11526489257812, "logps_train/policy_1_l": -188.8481903076172, "logps_train/policy_1_w": -119.47007751464844, "logps_train/policy_2_2": -137.80665588378906, "logps_train/policy_2_w": -199.89556884765625, "logps_train/ref_1_2": -201.0, "logps_train/ref_1_l": -166.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -172.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -2.2443392276763916, "rewards_train/1-l": -2.3006398677825928, "rewards_train/1-w": 3.2982072830200195, "rewards_train/2-2": 3.450974941253662, "rewards_train/2-w": -1.8152399063110352, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.598847150802612, "rewards_train/margins_1": 5.542546510696411, "rewards_train/margins_2": 5.266214847564697, "step": 646 }, { "epoch": 1.94, "logps_train/policy_1_2": -176.5614471435547, "logps_train/policy_1_l": -180.50921630859375, "logps_train/policy_1_w": -149.7985076904297, "logps_train/policy_2_2": -106.56483459472656, "logps_train/policy_2_w": -247.18484497070312, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -219.0, "rewards_train/1-2": -1.6264572143554688, "rewards_train/1-l": -2.0257270336151123, "rewards_train/1-w": 3.6732735633850098, "rewards_train/2-2": 2.626085042953491, "rewards_train/2-w": -2.7630159854888916, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.699000597000122, "rewards_train/margins_1": 5.2997307777404785, "rewards_train/margins_2": 5.389101028442383, "step": 647 }, { "epoch": 1.94, "logps_train/policy_1_2": -114.39543151855469, "logps_train/policy_1_l": -131.29638671875, "logps_train/policy_1_w": -150.57981872558594, "logps_train/policy_2_2": -67.34768676757812, "logps_train/policy_2_w": -237.89474487304688, "logps_train/ref_1_2": -102.0, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -184.0, "logps_train/ref_2_2": -84.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": -1.219621181488037, "rewards_train/1-l": -2.0027832984924316, "rewards_train/1-w": 3.355299472808838, "rewards_train/2-2": 1.652730941772461, "rewards_train/2-w": -2.514765739440918, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.3580827713012695, "rewards_train/margins_1": 4.574920654296875, "rewards_train/margins_2": 4.167496681213379, "step": 647 }, { "epoch": 1.94, "logps_train/policy_1_2": -134.39804077148438, "logps_train/policy_1_l": -141.57699584960938, "logps_train/policy_1_w": -74.64097595214844, "logps_train/policy_2_2": -74.07282257080078, "logps_train/policy_2_w": -133.7533721923828, "logps_train/ref_1_2": -112.5, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -97.5, "logps_train/ref_2_2": -95.0, "logps_train/ref_2_w": -116.5, "rewards_train/1-2": -2.2033236026763916, "rewards_train/1-l": -2.2645792961120605, "rewards_train/1-w": 2.2678914070129395, "rewards_train/2-2": 2.0789787769317627, "rewards_train/2-w": -1.7152488231658936, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.532470703125, "rewards_train/margins_1": 4.471215009689331, "rewards_train/margins_2": 3.7942276000976562, "step": 647 }, { "epoch": 1.94, "logps_train/policy_1_2": -150.02490234375, "logps_train/policy_1_l": -126.69125366210938, "logps_train/policy_1_w": -72.06485748291016, "logps_train/policy_2_2": -83.35295104980469, "logps_train/policy_2_w": -132.87762451171875, "logps_train/ref_1_2": -133.0, "logps_train/ref_1_l": -108.0, "logps_train/ref_1_w": -95.5, "logps_train/ref_2_2": -109.5, "logps_train/ref_2_w": -119.5, "rewards_train/1-2": -1.6622543334960938, "rewards_train/1-l": -1.8425627946853638, "rewards_train/1-w": 2.3483967781066895, "rewards_train/2-2": 2.5951735973358154, "rewards_train/2-w": -1.3447933197021484, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.190959572792053, "rewards_train/margins_1": 4.010651111602783, "rewards_train/margins_2": 3.939966917037964, "step": 647 }, { "epoch": 1.94, "learning_rate": 1.4747331653923725e-08, "loss": 0.5259, "step": 648 }, { "epoch": 1.94, "logps_train/policy_1_2": -105.46733856201172, "logps_train/policy_1_l": -137.21664428710938, "logps_train/policy_1_w": -78.14047241210938, "logps_train/policy_2_2": -60.979583740234375, "logps_train/policy_2_w": -126.84852600097656, "logps_train/ref_1_2": -93.5, "logps_train/ref_1_l": -111.0, "logps_train/ref_1_w": -98.5, "logps_train/ref_2_2": -78.5, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": -1.2154841423034668, "rewards_train/1-l": -2.6363613605499268, "rewards_train/1-w": 2.039957046508789, "rewards_train/2-2": 1.7580965757369995, "rewards_train/2-w": -1.303016185760498, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.676318407058716, "rewards_train/margins_1": 3.255441188812256, "rewards_train/margins_2": 3.0611127614974976, "step": 648 }, { "epoch": 1.94, "logps_train/policy_1_2": -131.98065185546875, "logps_train/policy_1_l": -112.97000885009766, "logps_train/policy_1_w": -76.50981140136719, "logps_train/policy_2_2": -78.03260040283203, "logps_train/policy_2_w": -118.34542846679688, "logps_train/ref_1_2": -117.0, "logps_train/ref_1_l": -94.0, "logps_train/ref_1_w": -97.5, "logps_train/ref_2_2": -99.5, "logps_train/ref_2_w": -111.0, "rewards_train/1-2": -1.47072172164917, "rewards_train/1-l": -1.936844825744629, "rewards_train/1-w": 2.0998005867004395, "rewards_train/2-2": 2.185020923614502, "rewards_train/2-w": -0.7267303466796875, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.036645412445068, "rewards_train/margins_1": 3.5705223083496094, "rewards_train/margins_2": 2.9117512702941895, "step": 648 }, { "epoch": 1.94, "logps_train/policy_1_2": -184.65383911132812, "logps_train/policy_1_l": -165.71188354492188, "logps_train/policy_1_w": -84.0541000366211, "logps_train/policy_2_2": -117.7982406616211, "logps_train/policy_2_w": -123.07169342041016, "logps_train/ref_1_2": -163.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -103.5, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -119.0, "rewards_train/1-2": -2.1921427249908447, "rewards_train/1-l": -1.9802207946777344, "rewards_train/1-w": 1.9367778301239014, "rewards_train/2-2": 2.4389262199401855, "rewards_train/2-w": -0.41381001472473145, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.8125, "rewards_train/margins": 3.9169986248016357, "rewards_train/margins_1": 4.128920555114746, "rewards_train/margins_2": 2.852736234664917, "step": 648 }, { "epoch": 1.94, "logps_train/policy_1_2": -176.108154296875, "logps_train/policy_1_l": -143.5790557861328, "logps_train/policy_1_w": -139.62588500976562, "logps_train/policy_2_2": -112.20809936523438, "logps_train/policy_2_w": -228.8516387939453, "logps_train/ref_1_2": -162.0, "logps_train/ref_1_l": -117.5, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -210.0, "rewards_train/1-2": -1.4393316507339478, "rewards_train/1-l": -2.5922813415527344, "rewards_train/1-w": 3.9178807735443115, "rewards_train/2-2": 2.94950270652771, "rewards_train/2-w": -1.9164142608642578, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.510162115097046, "rewards_train/margins_1": 5.357212424278259, "rewards_train/margins_2": 4.865916967391968, "step": 648 }, { "epoch": 1.94, "logps_train/policy_1_2": -80.69328308105469, "logps_train/policy_1_l": -60.966835021972656, "logps_train/policy_1_w": -65.24893951416016, "logps_train/policy_2_2": -43.70426940917969, "logps_train/policy_2_w": -120.50337982177734, "logps_train/ref_1_2": -72.0, "logps_train/ref_1_l": -48.75, "logps_train/ref_1_w": -87.5, "logps_train/ref_2_2": -59.25, "logps_train/ref_2_w": -105.5, "rewards_train/1-2": -0.8861252665519714, "rewards_train/1-l": -1.2364296913146973, "rewards_train/1-w": 2.2219815254211426, "rewards_train/2-2": 1.553206205368042, "rewards_train/2-w": -1.5034626722335815, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.45841121673584, "rewards_train/margins_1": 3.108106791973114, "rewards_train/margins_2": 3.0566688776016235, "step": 649 }, { "epoch": 1.94, "logps_train/policy_1_2": -206.48159790039062, "logps_train/policy_1_l": -158.0762176513672, "logps_train/policy_1_w": -116.99897766113281, "logps_train/policy_2_2": -126.91778564453125, "logps_train/policy_2_w": -195.4987030029297, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -148.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -177.0, "rewards_train/1-2": -2.0063629150390625, "rewards_train/1-l": -1.812504529953003, "rewards_train/1-w": 3.0969767570495605, "rewards_train/2-2": 3.4898624420166016, "rewards_train/2-w": -1.8967454433441162, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.9094812870025635, "rewards_train/margins_1": 5.103339672088623, "rewards_train/margins_2": 5.386607885360718, "step": 649 }, { "epoch": 1.94, "logps_train/policy_1_2": -234.76048278808594, "logps_train/policy_1_l": -182.75506591796875, "logps_train/policy_1_w": -76.7974853515625, "logps_train/policy_2_2": -156.7506103515625, "logps_train/policy_2_w": -121.26100158691406, "logps_train/ref_1_2": -224.0, "logps_train/ref_1_l": -157.0, "logps_train/ref_1_w": -96.5, "logps_train/ref_2_2": -197.0, "logps_train/ref_2_w": -113.5, "rewards_train/1-2": -1.0822980403900146, "rewards_train/1-l": -2.5795094966888428, "rewards_train/1-w": 1.9469125270843506, "rewards_train/2-2": 4.080992698669434, "rewards_train/2-w": -0.7921152114868164, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.526422023773193, "rewards_train/margins_1": 3.0292105674743652, "rewards_train/margins_2": 4.87310791015625, "step": 649 }, { "epoch": 1.94, "logps_train/policy_1_2": -156.25601196289062, "logps_train/policy_1_l": -176.96185302734375, "logps_train/policy_1_w": -138.65774536132812, "logps_train/policy_2_2": -103.43133544921875, "logps_train/policy_2_w": -210.75624084472656, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -155.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": -0.6033350229263306, "rewards_train/1-l": -2.218841791152954, "rewards_train/1-w": 3.4645001888275146, "rewards_train/2-2": 2.6295223236083984, "rewards_train/2-w": -1.1304092407226562, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.683341979980469, "rewards_train/margins_1": 4.067835211753845, "rewards_train/margins_2": 3.7599315643310547, "step": 649 }, { "epoch": 1.95, "learning_rate": 1.2189959620839686e-08, "loss": 0.7071, "step": 650 }, { "epoch": 1.95, "logps_train/policy_1_2": -269.05450439453125, "logps_train/policy_1_l": -252.62887573242188, "logps_train/policy_1_w": -183.9564208984375, "logps_train/policy_2_2": -174.74703979492188, "logps_train/policy_2_w": -285.3765563964844, "logps_train/ref_1_2": -247.0, "logps_train/ref_1_l": -222.0, "logps_train/ref_1_w": -225.0, "logps_train/ref_2_2": -214.0, "logps_train/ref_2_w": -260.0, "rewards_train/1-2": -2.229668617248535, "rewards_train/1-l": -3.0994107723236084, "rewards_train/1-w": 4.062170028686523, "rewards_train/2-2": 3.9077165126800537, "rewards_train/2-w": -2.5673422813415527, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 7.161580801010132, "rewards_train/margins_1": 6.291838645935059, "rewards_train/margins_2": 6.4750587940216064, "step": 650 }, { "epoch": 1.95, "logps_train/policy_1_2": -190.36215209960938, "logps_train/policy_1_l": -165.19403076171875, "logps_train/policy_1_w": -137.1510467529297, "logps_train/policy_2_2": -114.61737060546875, "logps_train/policy_2_w": -226.3367462158203, "logps_train/ref_1_2": -167.0, "logps_train/ref_1_l": -144.0, "logps_train/ref_1_w": -169.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -194.0, "rewards_train/1-2": -2.388448715209961, "rewards_train/1-l": -2.1628599166870117, "rewards_train/1-w": 3.1679036617279053, "rewards_train/2-2": 2.5901331901550293, "rewards_train/2-w": -3.240706443786621, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.330763578414917, "rewards_train/margins_1": 5.556352376937866, "rewards_train/margins_2": 5.83083963394165, "step": 650 }, { "epoch": 1.95, "logps_train/policy_1_2": -228.0786590576172, "logps_train/policy_1_l": -237.30963134765625, "logps_train/policy_1_w": -140.74435424804688, "logps_train/policy_2_2": -149.2498321533203, "logps_train/policy_2_w": -223.7896728515625, "logps_train/ref_1_2": -213.0, "logps_train/ref_1_l": -206.0, "logps_train/ref_1_w": -177.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -1.514115571975708, "rewards_train/1-l": -3.1387765407562256, "rewards_train/1-w": 3.5896263122558594, "rewards_train/2-2": 3.499236583709717, "rewards_train/2-w": -1.1859993934631348, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.728402853012085, "rewards_train/margins_1": 5.103741884231567, "rewards_train/margins_2": 4.685235977172852, "step": 650 }, { "epoch": 1.95, "logps_train/policy_1_2": -141.93446350097656, "logps_train/policy_1_l": -155.0591583251953, "logps_train/policy_1_w": -85.45524597167969, "logps_train/policy_2_2": -94.73919677734375, "logps_train/policy_2_w": -140.16993713378906, "logps_train/ref_1_2": -137.0, "logps_train/ref_1_l": -135.0, "logps_train/ref_1_w": -114.5, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -131.0, "rewards_train/1-2": -0.5328991413116455, "rewards_train/1-l": -1.952204942703247, "rewards_train/1-w": 2.9062328338623047, "rewards_train/2-2": 2.4885804653167725, "rewards_train/2-w": -0.9193374514579773, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.858437776565552, "rewards_train/margins_1": 3.43913197517395, "rewards_train/margins_2": 3.4079179167747498, "step": 650 }, { "epoch": 1.95, "logps_train/policy_1_2": -147.87132263183594, "logps_train/policy_1_l": -123.79977416992188, "logps_train/policy_1_w": -88.84053802490234, "logps_train/policy_2_2": -91.22012329101562, "logps_train/policy_2_w": -135.29830932617188, "logps_train/ref_1_2": -134.0, "logps_train/ref_1_l": -104.0, "logps_train/ref_1_w": -110.5, "logps_train/ref_2_2": -116.0, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": -1.4488508701324463, "rewards_train/1-l": -1.964449167251587, "rewards_train/1-w": 2.185868263244629, "rewards_train/2-2": 2.4988861083984375, "rewards_train/2-w": -1.0425268411636353, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.150317430496216, "rewards_train/margins_1": 3.634719133377075, "rewards_train/margins_2": 3.5414129495620728, "step": 651 }, { "epoch": 1.95, "logps_train/policy_1_2": -207.82464599609375, "logps_train/policy_1_l": -222.64962768554688, "logps_train/policy_1_w": -151.0751953125, "logps_train/policy_2_2": -127.64215087890625, "logps_train/policy_2_w": -241.78204345703125, "logps_train/ref_1_2": -192.0, "logps_train/ref_1_l": -204.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -1.5902777910232544, "rewards_train/1-l": -1.8851770162582397, "rewards_train/1-w": 3.7959961891174316, "rewards_train/2-2": 3.183441638946533, "rewards_train/2-w": -1.5688292980194092, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.681173205375671, "rewards_train/margins_1": 5.386273980140686, "rewards_train/margins_2": 4.752270936965942, "step": 651 }, { "epoch": 1.95, "logps_train/policy_1_2": -177.91458129882812, "logps_train/policy_1_l": -166.54251098632812, "logps_train/policy_1_w": -104.21583557128906, "logps_train/policy_2_2": -117.63484954833984, "logps_train/policy_2_w": -156.32005310058594, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -127.5, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -1.8016133308410645, "rewards_train/1-l": -2.726515054702759, "rewards_train/1-w": 2.3541970252990723, "rewards_train/2-2": 2.177433490753174, "rewards_train/2-w": -1.202317714691162, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.080712080001831, "rewards_train/margins_1": 4.155810356140137, "rewards_train/margins_2": 3.379751205444336, "step": 651 }, { "epoch": 1.95, "logps_train/policy_1_2": -185.46649169921875, "logps_train/policy_1_l": -163.16021728515625, "logps_train/policy_1_w": -138.01101684570312, "logps_train/policy_2_2": -129.49569702148438, "logps_train/policy_2_w": -224.791748046875, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -139.0, "logps_train/ref_1_w": -173.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -1.0857114791870117, "rewards_train/1-l": -2.3736376762390137, "rewards_train/1-w": 3.486008644104004, "rewards_train/2-2": 2.542227029800415, "rewards_train/2-w": -2.1740970611572266, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.859646320343018, "rewards_train/margins_1": 4.571720123291016, "rewards_train/margins_2": 4.716324090957642, "step": 651 }, { "epoch": 1.95, "learning_rate": 9.87539346195776e-09, "loss": 0.4371, "step": 652 }, { "epoch": 1.95, "logps_train/policy_1_2": -154.264404296875, "logps_train/policy_1_l": -128.38967895507812, "logps_train/policy_1_w": -79.91938781738281, "logps_train/policy_2_2": -97.63665771484375, "logps_train/policy_2_w": -123.69718170166016, "logps_train/ref_1_2": -144.0, "logps_train/ref_1_l": -104.5, "logps_train/ref_1_w": -103.5, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -114.5, "rewards_train/1-2": -1.0112061500549316, "rewards_train/1-l": -2.418461322784424, "rewards_train/1-w": 2.3496623039245605, "rewards_train/2-2": 2.5224666595458984, "rewards_train/2-w": -0.9144200086593628, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.768123626708984, "rewards_train/margins_1": 3.360868453979492, "rewards_train/margins_2": 3.4368866682052612, "step": 652 }, { "epoch": 1.95, "logps_train/policy_1_2": -231.90765380859375, "logps_train/policy_1_l": -207.56002807617188, "logps_train/policy_1_w": -144.27774047851562, "logps_train/policy_2_2": -165.47683715820312, "logps_train/policy_2_w": -217.80615234375, "logps_train/ref_1_2": -220.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -180.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -205.0, "rewards_train/1-2": -1.2345154285430908, "rewards_train/1-l": -2.159907817840576, "rewards_train/1-w": 3.5734968185424805, "rewards_train/2-2": 3.269505023956299, "rewards_train/2-w": -1.3079588413238525, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.733404636383057, "rewards_train/margins_1": 4.808012247085571, "rewards_train/margins_2": 4.577463865280151, "step": 652 }, { "epoch": 1.95, "logps_train/policy_1_2": -167.0195770263672, "logps_train/policy_1_l": -128.70001220703125, "logps_train/policy_1_w": -105.91691589355469, "logps_train/policy_2_2": -111.6172866821289, "logps_train/policy_2_w": -152.33067321777344, "logps_train/ref_1_2": -160.0, "logps_train/ref_1_l": -106.0, "logps_train/ref_1_w": -132.0, "logps_train/ref_2_2": -143.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -0.7714887261390686, "rewards_train/1-l": -2.2965636253356934, "rewards_train/1-w": 2.649177074432373, "rewards_train/2-2": 3.1085848808288574, "rewards_train/2-w": -0.48423898220062256, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.945740699768066, "rewards_train/margins_1": 3.4206658005714417, "rewards_train/margins_2": 3.59282386302948, "step": 652 }, { "epoch": 1.95, "logps_train/policy_1_2": -189.4738006591797, "logps_train/policy_1_l": -236.15200805664062, "logps_train/policy_1_w": -157.6190948486328, "logps_train/policy_2_2": -136.73439025878906, "logps_train/policy_2_w": -232.1602783203125, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -203.0, "logps_train/ref_1_w": -200.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -227.0, "rewards_train/1-2": -0.6477705836296082, "rewards_train/1-l": -3.3116848468780518, "rewards_train/1-w": 4.20313024520874, "rewards_train/2-2": 2.861668109893799, "rewards_train/2-w": -0.5001099705696106, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 7.514815092086792, "rewards_train/margins_1": 4.850900828838348, "rewards_train/margins_2": 3.3617780804634094, "step": 652 }, { "epoch": 1.96, "logps_train/policy_1_2": -195.1488037109375, "logps_train/policy_1_l": -150.64398193359375, "logps_train/policy_1_w": -101.30374145507812, "logps_train/policy_2_2": -120.6136245727539, "logps_train/policy_2_w": -191.32069396972656, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -129.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -2.2836313247680664, "rewards_train/1-l": -2.159369707107544, "rewards_train/1-w": 3.1711881160736084, "rewards_train/2-2": 2.6681299209594727, "rewards_train/2-w": -2.675039291381836, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.330557823181152, "rewards_train/margins_1": 5.454819440841675, "rewards_train/margins_2": 5.343169212341309, "step": 653 }, { "epoch": 1.96, "logps_train/policy_1_2": -164.5848846435547, "logps_train/policy_1_l": -152.47361755371094, "logps_train/policy_1_w": -111.37178802490234, "logps_train/policy_2_2": -108.58051300048828, "logps_train/policy_2_w": -172.4100341796875, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -1.1350501775741577, "rewards_train/1-l": -1.8534351587295532, "rewards_train/1-w": 2.598172664642334, "rewards_train/2-2": 2.5372612476348877, "rewards_train/2-w": -1.2626824378967285, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 4.451607823371887, "rewards_train/margins_1": 3.7332228422164917, "rewards_train/margins_2": 3.799943685531616, "step": 653 }, { "epoch": 1.96, "logps_train/policy_1_2": -231.81126403808594, "logps_train/policy_1_l": -191.86209106445312, "logps_train/policy_1_w": -142.01193237304688, "logps_train/policy_2_2": -157.95816040039062, "logps_train/policy_2_w": -201.14260864257812, "logps_train/ref_1_2": -222.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -179.0, "logps_train/ref_2_2": -198.0, "logps_train/ref_2_w": -199.0, "rewards_train/1-2": -1.0033912658691406, "rewards_train/1-l": -2.8385534286499023, "rewards_train/1-w": 3.661306142807007, "rewards_train/2-2": 3.9983248710632324, "rewards_train/2-w": -0.1906268298625946, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.499859571456909, "rewards_train/margins_1": 4.6646974086761475, "rewards_train/margins_2": 4.188951700925827, "step": 653 }, { "epoch": 1.96, "logps_train/policy_1_2": -195.1594696044922, "logps_train/policy_1_l": -229.62457275390625, "logps_train/policy_1_w": -129.98526000976562, "logps_train/policy_2_2": -117.54888916015625, "logps_train/policy_2_w": -215.9095916748047, "logps_train/ref_1_2": -179.0, "logps_train/ref_1_l": -208.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -198.0, "rewards_train/1-2": -1.6179001331329346, "rewards_train/1-l": -2.196685552597046, "rewards_train/1-w": 3.5842864513397217, "rewards_train/2-2": 2.9384710788726807, "rewards_train/2-w": -1.8128339052200317, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.780972003936768, "rewards_train/margins_1": 5.202186584472656, "rewards_train/margins_2": 4.751304984092712, "step": 653 }, { "epoch": 1.96, "learning_rate": 7.803859074854425e-09, "loss": 0.5404, "step": 654 }, { "epoch": 1.96, "logps_train/policy_1_2": -236.23223876953125, "logps_train/policy_1_l": -216.6819610595703, "logps_train/policy_1_w": -207.5401153564453, "logps_train/policy_2_2": -162.59471130371094, "logps_train/policy_2_w": -293.4752197265625, "logps_train/ref_1_2": -229.0, "logps_train/ref_1_l": -194.0, "logps_train/ref_1_w": -250.0, "logps_train/ref_2_2": -199.0, "logps_train/ref_2_w": -278.0, "rewards_train/1-2": -0.6904104351997375, "rewards_train/1-l": -2.2703447341918945, "rewards_train/1-w": 4.2532148361206055, "rewards_train/2-2": 3.6276376247406006, "rewards_train/2-w": -1.5764291286468506, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.5235595703125, "rewards_train/margins_1": 4.943625271320343, "rewards_train/margins_2": 5.204066753387451, "step": 654 }, { "epoch": 1.96, "logps_train/policy_1_2": -163.4420928955078, "logps_train/policy_1_l": -207.38140869140625, "logps_train/policy_1_w": -99.56378173828125, "logps_train/policy_2_2": -109.16834259033203, "logps_train/policy_2_w": -158.85693359375, "logps_train/ref_1_2": -153.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -125.0, "logps_train/ref_2_2": -136.0, "logps_train/ref_2_w": -149.0, "rewards_train/1-2": -1.0039758682250977, "rewards_train/1-l": -2.5760326385498047, "rewards_train/1-w": 2.58073091506958, "rewards_train/2-2": 2.6161742210388184, "rewards_train/2-w": -1.0251460075378418, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.156763553619385, "rewards_train/margins_1": 3.5847067832946777, "rewards_train/margins_2": 3.64132022857666, "step": 654 }, { "epoch": 1.96, "logps_train/policy_1_2": -212.12118530273438, "logps_train/policy_1_l": -212.76498413085938, "logps_train/policy_1_w": -115.82312774658203, "logps_train/policy_2_2": -132.5570526123047, "logps_train/policy_2_w": -183.32875061035156, "logps_train/ref_1_2": -191.0, "logps_train/ref_1_l": -181.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -165.0, "logps_train/ref_2_w": -168.0, "rewards_train/1-2": -2.1269631385803223, "rewards_train/1-l": -3.1862635612487793, "rewards_train/1-w": 2.8633904457092285, "rewards_train/2-2": 3.205233097076416, "rewards_train/2-w": -1.5059216022491455, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.049654006958008, "rewards_train/margins_1": 4.990353584289551, "rewards_train/margins_2": 4.7111546993255615, "step": 654 }, { "epoch": 1.96, "logps_train/policy_1_2": -119.91590881347656, "logps_train/policy_1_l": -135.66099548339844, "logps_train/policy_1_w": -90.70904541015625, "logps_train/policy_2_2": -74.68325805664062, "logps_train/policy_2_w": -149.31028747558594, "logps_train/ref_1_2": -109.5, "logps_train/ref_1_l": -111.5, "logps_train/ref_1_w": -115.0, "logps_train/ref_2_2": -93.5, "logps_train/ref_2_w": -133.0, "rewards_train/1-2": -1.0369036197662354, "rewards_train/1-l": -2.4204211235046387, "rewards_train/1-w": 2.4183528423309326, "rewards_train/2-2": 1.913314700126648, "rewards_train/2-w": -1.6577863693237305, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.838773965835571, "rewards_train/margins_1": 3.455256462097168, "rewards_train/margins_2": 3.5711010694503784, "step": 654 }, { "epoch": 1.96, "logps_train/policy_1_2": -272.013671875, "logps_train/policy_1_l": -218.05596923828125, "logps_train/policy_1_w": -125.1883773803711, "logps_train/policy_2_2": -163.30087280273438, "logps_train/policy_2_w": -194.15673828125, "logps_train/ref_1_2": -242.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -207.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -3.0330090522766113, "rewards_train/1-l": -2.8689770698547363, "rewards_train/1-w": 3.2217869758605957, "rewards_train/2-2": 4.372550010681152, "rewards_train/2-w": -1.154344916343689, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.090764045715332, "rewards_train/margins_1": 6.254796028137207, "rewards_train/margins_2": 5.526894927024841, "step": 655 }, { "epoch": 1.96, "logps_train/policy_1_2": -199.65426635742188, "logps_train/policy_1_l": -157.90667724609375, "logps_train/policy_1_w": -124.73347473144531, "logps_train/policy_2_2": -120.39617919921875, "logps_train/policy_2_w": -212.21731567382812, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -143.0, "logps_train/ref_1_w": -158.0, "logps_train/ref_2_2": -151.0, "logps_train/ref_2_w": -192.0, "rewards_train/1-2": -1.9136677980422974, "rewards_train/1-l": -1.4787530899047852, "rewards_train/1-w": 3.311028003692627, "rewards_train/2-2": 3.0207338333129883, "rewards_train/2-w": -2.0623555183410645, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.789781093597412, "rewards_train/margins_1": 5.224695801734924, "rewards_train/margins_2": 5.083089351654053, "step": 655 }, { "epoch": 1.96, "logps_train/policy_1_2": -227.80714416503906, "logps_train/policy_1_l": -176.82318115234375, "logps_train/policy_1_w": -120.5469970703125, "logps_train/policy_2_2": -145.31573486328125, "logps_train/policy_2_w": -185.21206665039062, "logps_train/ref_1_2": -209.0, "logps_train/ref_1_l": -158.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -177.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -1.8570818901062012, "rewards_train/1-l": -1.91825532913208, "rewards_train/1-w": 3.01444149017334, "rewards_train/2-2": 3.165398359298706, "rewards_train/2-w": -1.1385899782180786, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.93269681930542, "rewards_train/margins_1": 4.871523380279541, "rewards_train/margins_2": 4.303988337516785, "step": 655 }, { "epoch": 1.96, "logps_train/policy_1_2": -187.640380859375, "logps_train/policy_1_l": -138.34027099609375, "logps_train/policy_1_w": -105.73775482177734, "logps_train/policy_2_2": -119.41595458984375, "logps_train/policy_2_w": -175.20205688476562, "logps_train/ref_1_2": -174.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -152.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -1.3968510627746582, "rewards_train/1-l": -1.92055082321167, "rewards_train/1-w": 3.162747621536255, "rewards_train/2-2": 3.233307361602783, "rewards_train/2-w": -1.4129797220230103, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.083298444747925, "rewards_train/margins_1": 4.559598684310913, "rewards_train/margins_2": 4.6462870836257935, "step": 655 }, { "epoch": 1.96, "learning_rate": 5.975558637634216e-09, "loss": 0.4487, "step": 656 }, { "epoch": 1.96, "logps_train/policy_1_2": -186.267822265625, "logps_train/policy_1_l": -152.92657470703125, "logps_train/policy_1_w": -104.50222778320312, "logps_train/policy_2_2": -117.52772521972656, "logps_train/policy_2_w": -170.11402893066406, "logps_train/ref_1_2": -169.0, "logps_train/ref_1_l": -127.5, "logps_train/ref_1_w": -134.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -1.7638931274414062, "rewards_train/1-l": -2.5354323387145996, "rewards_train/1-w": 2.9191126823425293, "rewards_train/2-2": 2.847227096557617, "rewards_train/2-w": -1.291481614112854, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.454545021057129, "rewards_train/margins_1": 4.6830058097839355, "rewards_train/margins_2": 4.138708710670471, "step": 656 }, { "epoch": 1.96, "logps_train/policy_1_2": -99.52873229980469, "logps_train/policy_1_l": -88.83071899414062, "logps_train/policy_1_w": -61.50988006591797, "logps_train/policy_2_2": -51.01097869873047, "logps_train/policy_2_w": -117.17747497558594, "logps_train/ref_1_2": -85.0, "logps_train/ref_1_l": -71.0, "logps_train/ref_1_w": -83.5, "logps_train/ref_2_2": -69.0, "logps_train/ref_2_w": -102.0, "rewards_train/1-2": -1.4427170753479004, "rewards_train/1-l": -1.7802399396896362, "rewards_train/1-w": 2.195105791091919, "rewards_train/2-2": 1.7660897970199585, "rewards_train/2-w": -1.4923573732376099, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 3.975345730781555, "rewards_train/margins_1": 3.6378228664398193, "rewards_train/margins_2": 3.2584471702575684, "step": 656 }, { "epoch": 1.96, "logps_train/policy_1_2": -212.41632080078125, "logps_train/policy_1_l": -195.26132202148438, "logps_train/policy_1_w": -94.79259490966797, "logps_train/policy_2_2": -147.2790069580078, "logps_train/policy_2_w": -151.50360107421875, "logps_train/ref_1_2": -199.0, "logps_train/ref_1_l": -170.0, "logps_train/ref_1_w": -119.0, "logps_train/ref_2_2": -179.0, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.3701475858688354, "rewards_train/1-l": -2.5607025623321533, "rewards_train/1-w": 2.445349931716919, "rewards_train/2-2": 3.1880178451538086, "rewards_train/2-w": -1.2030953168869019, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.006052494049072, "rewards_train/margins_1": 3.8154975175857544, "rewards_train/margins_2": 4.3911131620407104, "step": 656 }, { "epoch": 1.96, "logps_train/policy_1_2": -178.68743896484375, "logps_train/policy_1_l": -168.3417510986328, "logps_train/policy_1_w": -121.18901062011719, "logps_train/policy_2_2": -107.8758316040039, "logps_train/policy_2_w": -202.86024475097656, "logps_train/ref_1_2": -164.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -151.0, "logps_train/ref_2_2": -135.0, "logps_train/ref_2_w": -183.0, "rewards_train/1-2": -1.4800732135772705, "rewards_train/1-l": -2.3206369876861572, "rewards_train/1-w": 3.0420122146606445, "rewards_train/2-2": 2.713979482650757, "rewards_train/2-w": -1.982703685760498, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.362649202346802, "rewards_train/margins_1": 4.522085428237915, "rewards_train/margins_2": 4.696683168411255, "step": 656 }, { "epoch": 1.97, "logps_train/policy_1_2": -186.33506774902344, "logps_train/policy_1_l": -192.46441650390625, "logps_train/policy_1_w": -118.23590850830078, "logps_train/policy_2_2": -138.10379028320312, "logps_train/policy_2_w": -173.0489959716797, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -168.0, "logps_train/ref_1_w": -149.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -170.0, "rewards_train/1-2": -0.1163189560174942, "rewards_train/1-l": -2.416755199432373, "rewards_train/1-w": 3.0894951820373535, "rewards_train/2-2": 3.0068087577819824, "rewards_train/2-w": -0.2556803524494171, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.506250381469727, "rewards_train/margins_1": 3.2058141380548477, "rewards_train/margins_2": 3.2624891102313995, "step": 657 }, { "epoch": 1.97, "logps_train/policy_1_2": -246.81341552734375, "logps_train/policy_1_l": -224.84939575195312, "logps_train/policy_1_w": -161.28082275390625, "logps_train/policy_2_2": -167.15769958496094, "logps_train/policy_2_w": -241.83714294433594, "logps_train/ref_1_2": -227.0, "logps_train/ref_1_l": -198.0, "logps_train/ref_1_w": -195.0, "logps_train/ref_2_2": -202.0, "logps_train/ref_2_w": -226.0, "rewards_train/1-2": -1.9543880224227905, "rewards_train/1-l": -2.638552188873291, "rewards_train/1-w": 3.324944257736206, "rewards_train/2-2": 3.508059024810791, "rewards_train/2-w": -1.6198465824127197, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.963496446609497, "rewards_train/margins_1": 5.279332280158997, "rewards_train/margins_2": 5.127905607223511, "step": 657 }, { "epoch": 1.97, "logps_train/policy_1_2": -232.07235717773438, "logps_train/policy_1_l": -196.468505859375, "logps_train/policy_1_w": -121.84370422363281, "logps_train/policy_2_2": -148.85525512695312, "logps_train/policy_2_w": -206.48001098632812, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -179.0, "logps_train/ref_1_w": -154.0, "logps_train/ref_2_2": -181.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -2.5400476455688477, "rewards_train/1-l": -1.722435712814331, "rewards_train/1-w": 3.1687541007995605, "rewards_train/2-2": 3.246114730834961, "rewards_train/2-w": -2.015188217163086, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.891189813613892, "rewards_train/margins_1": 5.708801746368408, "rewards_train/margins_2": 5.261302947998047, "step": 657 }, { "epoch": 1.97, "logps_train/policy_1_2": -189.88363647460938, "logps_train/policy_1_l": -172.60934448242188, "logps_train/policy_1_w": -134.07992553710938, "logps_train/policy_2_2": -116.80159759521484, "logps_train/policy_2_w": -209.80120849609375, "logps_train/ref_1_2": -168.0, "logps_train/ref_1_l": -149.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -146.0, "logps_train/ref_2_w": -188.0, "rewards_train/1-2": -2.2258622646331787, "rewards_train/1-l": -2.322935104370117, "rewards_train/1-w": 2.8435816764831543, "rewards_train/2-2": 2.918668508529663, "rewards_train/2-w": -2.202449083328247, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.1665167808532715, "rewards_train/margins_1": 5.069443941116333, "rewards_train/margins_2": 5.12111759185791, "step": 657 }, { "epoch": 1.97, "learning_rate": 4.390670589196622e-09, "loss": 0.4828, "step": 658 }, { "epoch": 1.97, "logps_train/policy_1_2": -215.07907104492188, "logps_train/policy_1_l": -176.22195434570312, "logps_train/policy_1_w": -117.24702453613281, "logps_train/policy_2_2": -151.65707397460938, "logps_train/policy_2_w": -179.71607971191406, "logps_train/ref_1_2": -204.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.111228108406067, "rewards_train/1-l": -2.272977590560913, "rewards_train/1-w": 2.748735189437866, "rewards_train/2-2": 2.829897403717041, "rewards_train/2-w": -1.3923097848892212, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.021712779998779, "rewards_train/margins_1": 3.859963297843933, "rewards_train/margins_2": 4.222207188606262, "step": 658 }, { "epoch": 1.97, "logps_train/policy_1_2": -170.09515380859375, "logps_train/policy_1_l": -179.83090209960938, "logps_train/policy_1_w": -149.3338623046875, "logps_train/policy_2_2": -107.05084228515625, "logps_train/policy_2_w": -221.97036743164062, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -156.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -134.0, "logps_train/ref_2_w": -204.0, "rewards_train/1-2": -0.9538506865501404, "rewards_train/1-l": -2.3188815116882324, "rewards_train/1-w": 2.871838092803955, "rewards_train/2-2": 2.6521432399749756, "rewards_train/2-w": -1.841275691986084, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.1907196044921875, "rewards_train/margins_1": 3.8256887793540955, "rewards_train/margins_2": 4.49341893196106, "step": 658 }, { "epoch": 1.97, "logps_train/policy_1_2": -184.7021942138672, "logps_train/policy_1_l": -215.22433471679688, "logps_train/policy_1_w": -108.00770568847656, "logps_train/policy_2_2": -114.42830657958984, "logps_train/policy_2_w": -179.93075561523438, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -133.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -160.0, "rewards_train/1-2": -2.0045948028564453, "rewards_train/1-l": -2.6003637313842773, "rewards_train/1-w": 2.4621691703796387, "rewards_train/2-2": 2.5513100624084473, "rewards_train/2-w": -1.934189796447754, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.062532901763916, "rewards_train/margins_1": 4.466763973236084, "rewards_train/margins_2": 4.485499858856201, "step": 658 }, { "epoch": 1.97, "logps_train/policy_1_2": -192.81626892089844, "logps_train/policy_1_l": -182.1763153076172, "logps_train/policy_1_w": -132.79953002929688, "logps_train/policy_2_2": -132.28048706054688, "logps_train/policy_2_w": -205.67782592773438, "logps_train/ref_1_2": -184.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -170.0, "logps_train/ref_2_2": -161.0, "logps_train/ref_2_w": -191.0, "rewards_train/1-2": -0.9391460418701172, "rewards_train/1-l": -2.0000295639038086, "rewards_train/1-w": 3.7228775024414062, "rewards_train/2-2": 2.868242025375366, "rewards_train/2-w": -1.4975690841674805, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.722907066345215, "rewards_train/margins_1": 4.662023544311523, "rewards_train/margins_2": 4.365811109542847, "step": 658 }, { "epoch": 1.97, "logps_train/policy_1_2": -175.94876098632812, "logps_train/policy_1_l": -170.4471893310547, "logps_train/policy_1_w": -120.89080810546875, "logps_train/policy_2_2": -105.17945861816406, "logps_train/policy_2_w": -196.36441040039062, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -147.0, "logps_train/ref_1_w": -153.0, "logps_train/ref_2_2": -133.0, "logps_train/ref_2_w": -178.0, "rewards_train/1-2": -1.9909696578979492, "rewards_train/1-l": -2.346867561340332, "rewards_train/1-w": 3.201153516769409, "rewards_train/2-2": 2.762131690979004, "rewards_train/2-w": -1.8501118421554565, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 5.548021078109741, "rewards_train/margins_1": 5.192123174667358, "rewards_train/margins_2": 4.6122435331344604, "step": 659 }, { "epoch": 1.97, "logps_train/policy_1_2": -238.3968048095703, "logps_train/policy_1_l": -212.35369873046875, "logps_train/policy_1_w": -150.41802978515625, "logps_train/policy_2_2": -151.65457153320312, "logps_train/policy_2_w": -238.32264709472656, "logps_train/ref_1_2": -212.0, "logps_train/ref_1_l": -183.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -184.0, "logps_train/ref_2_w": -216.0, "rewards_train/1-2": -2.615241527557373, "rewards_train/1-l": -2.9601497650146484, "rewards_train/1-w": 3.2839784622192383, "rewards_train/2-2": 3.1746304035186768, "rewards_train/2-w": -2.234607696533203, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.244128227233887, "rewards_train/margins_1": 5.899219989776611, "rewards_train/margins_2": 5.40923810005188, "step": 659 }, { "epoch": 1.97, "logps_train/policy_1_2": -233.64834594726562, "logps_train/policy_1_l": -164.74484252929688, "logps_train/policy_1_w": -149.64883422851562, "logps_train/policy_2_2": -148.60882568359375, "logps_train/policy_2_w": -229.02621459960938, "logps_train/ref_1_2": -217.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -186.0, "logps_train/ref_2_2": -190.0, "logps_train/ref_2_w": -213.0, "rewards_train/1-2": -1.6513583660125732, "rewards_train/1-l": -2.4638891220092773, "rewards_train/1-w": 3.636681079864502, "rewards_train/2-2": 4.101324081420898, "rewards_train/2-w": -1.5979349613189697, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.100570201873779, "rewards_train/margins_1": 5.288039445877075, "rewards_train/margins_2": 5.699259042739868, "step": 659 }, { "epoch": 1.97, "logps_train/policy_1_2": -197.94570922851562, "logps_train/policy_1_l": -186.0768585205078, "logps_train/policy_1_w": -126.37952423095703, "logps_train/policy_2_2": -124.32962799072266, "logps_train/policy_2_w": -207.65188598632812, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -163.0, "logps_train/ref_1_w": -163.0, "logps_train/ref_2_2": -158.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.274307370185852, "rewards_train/1-l": -2.285810947418213, "rewards_train/1-w": 3.63704776763916, "rewards_train/2-2": 3.310408592224121, "rewards_train/2-w": -2.2073769569396973, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.922858715057373, "rewards_train/margins_1": 4.911355137825012, "rewards_train/margins_2": 5.517785549163818, "step": 659 }, { "epoch": 1.98, "learning_rate": 3.049349611820851e-09, "loss": 0.562, "step": 660 }, { "epoch": 1.98, "logps_train/policy_1_2": -161.73629760742188, "logps_train/policy_1_l": -148.61366271972656, "logps_train/policy_1_w": -85.62493896484375, "logps_train/policy_2_2": -101.05011749267578, "logps_train/policy_2_w": -137.06370544433594, "logps_train/ref_1_2": -148.0, "logps_train/ref_1_l": -128.0, "logps_train/ref_1_w": -107.5, "logps_train/ref_2_2": -125.5, "logps_train/ref_2_w": -125.0, "rewards_train/1-2": -1.4191367626190186, "rewards_train/1-l": -2.073133707046509, "rewards_train/1-w": 2.1729071140289307, "rewards_train/2-2": 2.4248223304748535, "rewards_train/2-w": -1.2004013061523438, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.2460408210754395, "rewards_train/margins_1": 3.592043876647949, "rewards_train/margins_2": 3.6252236366271973, "step": 660 }, { "epoch": 1.98, "logps_train/policy_1_2": -147.88955688476562, "logps_train/policy_1_l": -154.65826416015625, "logps_train/policy_1_w": -88.55953979492188, "logps_train/policy_2_2": -91.23843383789062, "logps_train/policy_2_w": -138.7486572265625, "logps_train/ref_1_2": -130.0, "logps_train/ref_1_l": -131.0, "logps_train/ref_1_w": -110.0, "logps_train/ref_2_2": -113.5, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": -1.8237204551696777, "rewards_train/1-l": -2.419243812561035, "rewards_train/1-w": 2.153616428375244, "rewards_train/2-2": 2.235532283782959, "rewards_train/2-w": -1.318616271018982, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.572860240936279, "rewards_train/margins_1": 3.977336883544922, "rewards_train/margins_2": 3.554148554801941, "step": 660 }, { "epoch": 1.98, "logps_train/policy_1_2": -175.44732666015625, "logps_train/policy_1_l": -113.77386474609375, "logps_train/policy_1_w": -78.74327087402344, "logps_train/policy_2_2": -119.00029754638672, "logps_train/policy_2_w": -126.02610778808594, "logps_train/ref_1_2": -161.0, "logps_train/ref_1_l": -99.0, "logps_train/ref_1_w": -102.0, "logps_train/ref_2_2": -142.0, "logps_train/ref_2_w": -117.0, "rewards_train/1-2": -1.4464911222457886, "rewards_train/1-l": -1.4855893850326538, "rewards_train/1-w": 2.3214735984802246, "rewards_train/2-2": 2.323603630065918, "rewards_train/2-w": -0.9080796241760254, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 3.8070629835128784, "rewards_train/margins_1": 3.767964720726013, "rewards_train/margins_2": 3.2316832542419434, "step": 660 }, { "epoch": 1.98, "logps_train/policy_1_2": -168.0440673828125, "logps_train/policy_1_l": -167.18991088867188, "logps_train/policy_1_w": -130.48292541503906, "logps_train/policy_2_2": -104.13139343261719, "logps_train/policy_2_w": -207.043212890625, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -145.0, "logps_train/ref_1_w": -162.0, "logps_train/ref_2_2": -130.0, "logps_train/ref_2_w": -184.0, "rewards_train/1-2": -1.255969524383545, "rewards_train/1-l": -2.2096166610717773, "rewards_train/1-w": 3.1829569339752197, "rewards_train/2-2": 2.583735704421997, "rewards_train/2-w": -2.3465096950531006, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.392573595046997, "rewards_train/margins_1": 4.438926458358765, "rewards_train/margins_2": 4.930245399475098, "step": 660 }, { "epoch": 1.98, "logps_train/policy_1_2": -203.17059326171875, "logps_train/policy_1_l": -172.72921752929688, "logps_train/policy_1_w": -113.96934509277344, "logps_train/policy_2_2": -124.42118072509766, "logps_train/policy_2_w": -199.2652130126953, "logps_train/ref_1_2": -178.0, "logps_train/ref_1_l": -152.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -2.4748713970184326, "rewards_train/1-l": -2.0599465370178223, "rewards_train/1-w": 2.709315299987793, "rewards_train/2-2": 2.8430380821228027, "rewards_train/2-w": -2.399177312850952, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.769261837005615, "rewards_train/margins_1": 5.184186697006226, "rewards_train/margins_2": 5.242215394973755, "step": 661 }, { "epoch": 1.98, "logps_train/policy_1_2": -124.71265411376953, "logps_train/policy_1_l": -116.50070190429688, "logps_train/policy_1_w": -109.64491271972656, "logps_train/policy_2_2": -67.96316528320312, "logps_train/policy_2_w": -171.2962646484375, "logps_train/ref_1_2": -113.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -138.0, "logps_train/ref_2_2": -91.0, "logps_train/ref_2_w": -156.0, "rewards_train/1-2": -1.1720463037490845, "rewards_train/1-l": -1.8458224534988403, "rewards_train/1-w": 2.864415168762207, "rewards_train/2-2": 2.32165265083313, "rewards_train/2-w": -1.5186901092529297, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.710237622261047, "rewards_train/margins_1": 4.0364614725112915, "rewards_train/margins_2": 3.8403427600860596, "step": 661 }, { "epoch": 1.98, "logps_train/policy_1_2": -212.24386596679688, "logps_train/policy_1_l": -151.64028930664062, "logps_train/policy_1_w": -118.76549530029297, "logps_train/policy_2_2": -137.14511108398438, "logps_train/policy_2_w": -197.4696044921875, "logps_train/ref_1_2": -193.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -152.0, "logps_train/ref_2_2": -168.0, "logps_train/ref_2_w": -180.0, "rewards_train/1-2": -1.909348726272583, "rewards_train/1-l": -1.847353458404541, "rewards_train/1-w": 3.309314727783203, "rewards_train/2-2": 3.0938875675201416, "rewards_train/2-w": -1.7934690713882446, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.156668186187744, "rewards_train/margins_1": 5.218663454055786, "rewards_train/margins_2": 4.887356638908386, "step": 661 }, { "epoch": 1.98, "logps_train/policy_1_2": -181.34335327148438, "logps_train/policy_1_l": -188.1213836669922, "logps_train/policy_1_w": -115.62313079833984, "logps_train/policy_2_2": -125.72923278808594, "logps_train/policy_2_w": -181.86676025390625, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -159.0, "logps_train/ref_1_w": -141.0, "logps_train/ref_2_2": -149.0, "logps_train/ref_2_w": -166.0, "rewards_train/1-2": -1.6062109470367432, "rewards_train/1-l": -2.9428024291992188, "rewards_train/1-w": 2.567765235900879, "rewards_train/2-2": 2.310671329498291, "rewards_train/2-w": -1.610503911972046, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.510567665100098, "rewards_train/margins_1": 4.173976182937622, "rewards_train/margins_2": 3.921175241470337, "step": 661 }, { "epoch": 1.98, "learning_rate": 1.951726616070404e-09, "loss": 0.5931, "step": 662 }, { "epoch": 1.98, "logps_train/policy_1_2": -114.68272399902344, "logps_train/policy_1_l": -78.59174346923828, "logps_train/policy_1_w": -59.968345642089844, "logps_train/policy_2_2": -62.20367431640625, "logps_train/policy_2_w": -113.0084228515625, "logps_train/ref_1_2": -100.0, "logps_train/ref_1_l": -63.0, "logps_train/ref_1_w": -81.5, "logps_train/ref_2_2": -83.5, "logps_train/ref_2_w": -98.0, "rewards_train/1-2": -1.473350167274475, "rewards_train/1-l": -1.5578134059906006, "rewards_train/1-w": 2.1557960510253906, "rewards_train/2-2": 2.125091552734375, "rewards_train/2-w": -1.4867310523986816, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.713609457015991, "rewards_train/margins_1": 3.6291462182998657, "rewards_train/margins_2": 3.6118226051330566, "step": 662 }, { "epoch": 1.98, "logps_train/policy_1_2": -190.9476776123047, "logps_train/policy_1_l": -129.65530395507812, "logps_train/policy_1_w": -111.38743591308594, "logps_train/policy_2_2": -114.67056274414062, "logps_train/policy_2_w": -177.20339965820312, "logps_train/ref_1_2": -172.0, "logps_train/ref_1_l": -110.0, "logps_train/ref_1_w": -144.0, "logps_train/ref_2_2": -147.0, "logps_train/ref_2_w": -165.0, "rewards_train/1-2": -1.8635177612304688, "rewards_train/1-l": -1.9706084728240967, "rewards_train/1-w": 3.277613401412964, "rewards_train/2-2": 3.2831389904022217, "rewards_train/2-w": -1.1832304000854492, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.2482218742370605, "rewards_train/margins_1": 5.141131162643433, "rewards_train/margins_2": 4.466369390487671, "step": 662 }, { "epoch": 1.98, "logps_train/policy_1_2": -193.54940795898438, "logps_train/policy_1_l": -199.42123413085938, "logps_train/policy_1_w": -151.81103515625, "logps_train/policy_2_2": -129.14218139648438, "logps_train/policy_2_w": -208.20758056640625, "logps_train/ref_1_2": -183.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -202.0, "rewards_train/1-2": -1.088923692703247, "rewards_train/1-l": -2.7167317867279053, "rewards_train/1-w": 3.127293825149536, "rewards_train/2-2": 3.4607813358306885, "rewards_train/2-w": -0.6285698413848877, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.844025611877441, "rewards_train/margins_1": 4.216217517852783, "rewards_train/margins_2": 4.089351177215576, "step": 662 }, { "epoch": 1.98, "logps_train/policy_1_2": -202.98544311523438, "logps_train/policy_1_l": -185.82827758789062, "logps_train/policy_1_w": -146.23936462402344, "logps_train/policy_2_2": -126.89039611816406, "logps_train/policy_2_w": -224.16696166992188, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -162.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -157.0, "logps_train/ref_2_w": -207.0, "rewards_train/1-2": -2.2141690254211426, "rewards_train/1-l": -2.3314602375030518, "rewards_train/1-w": 3.426844596862793, "rewards_train/2-2": 2.9961166381835938, "rewards_train/2-w": -1.7463843822479248, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.758304834365845, "rewards_train/margins_1": 5.6410136222839355, "rewards_train/margins_2": 4.7425010204315186, "step": 662 }, { "epoch": 1.99, "logps_train/policy_1_2": -163.35946655273438, "logps_train/policy_1_l": -174.90049743652344, "logps_train/policy_1_w": -118.9688720703125, "logps_train/policy_2_2": -100.08785247802734, "logps_train/policy_2_w": -192.33180236816406, "logps_train/ref_1_2": -147.0, "logps_train/ref_1_l": -140.0, "logps_train/ref_1_w": -147.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -173.0, "rewards_train/1-2": -1.6647560596466064, "rewards_train/1-l": -3.5323591232299805, "rewards_train/1-w": 2.8145389556884766, "rewards_train/2-2": 2.4371135234832764, "rewards_train/2-w": -1.935719609260559, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.875, "rewards_train/margins": 6.346898078918457, "rewards_train/margins_1": 4.479295015335083, "rewards_train/margins_2": 4.3728331327438354, "step": 663 }, { "epoch": 1.99, "logps_train/policy_1_2": -187.78115844726562, "logps_train/policy_1_l": -170.1365966796875, "logps_train/policy_1_w": -102.23222351074219, "logps_train/policy_2_2": -127.65076446533203, "logps_train/policy_2_w": -167.55450439453125, "logps_train/ref_1_2": -173.0, "logps_train/ref_1_l": -148.0, "logps_train/ref_1_w": -130.0, "logps_train/ref_2_2": -155.0, "logps_train/ref_2_w": -152.0, "rewards_train/1-2": -1.4591200351715088, "rewards_train/1-l": -2.209362506866455, "rewards_train/1-w": 2.7759974002838135, "rewards_train/2-2": 2.7043325901031494, "rewards_train/2-w": -1.5288889408111572, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.9853599071502686, "rewards_train/margins_1": 4.235117435455322, "rewards_train/margins_2": 4.233221530914307, "step": 663 }, { "epoch": 1.99, "logps_train/policy_1_2": -160.03750610351562, "logps_train/policy_1_l": -197.43896484375, "logps_train/policy_1_w": -138.4503173828125, "logps_train/policy_2_2": -104.31730651855469, "logps_train/policy_2_w": -213.83462524414062, "logps_train/ref_1_2": -151.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -171.0, "logps_train/ref_2_2": -132.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -0.925626277923584, "rewards_train/1-l": -2.5034680366516113, "rewards_train/1-w": 3.217857837677002, "rewards_train/2-2": 2.7409253120422363, "rewards_train/2-w": -1.6779930591583252, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.721325874328613, "rewards_train/margins_1": 4.143484115600586, "rewards_train/margins_2": 4.4189183712005615, "step": 663 }, { "epoch": 1.99, "logps_train/policy_1_2": -186.74945068359375, "logps_train/policy_1_l": -158.43763732910156, "logps_train/policy_1_w": -113.34635925292969, "logps_train/policy_2_2": -109.47393798828125, "logps_train/policy_2_w": -197.80641174316406, "logps_train/ref_1_2": -165.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -140.0, "logps_train/ref_2_w": -176.0, "rewards_train/1-2": -2.1499452590942383, "rewards_train/1-l": -1.7269542217254639, "rewards_train/1-w": 3.166926860809326, "rewards_train/2-2": 2.990105628967285, "rewards_train/2-w": -2.150172233581543, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.89388108253479, "rewards_train/margins_1": 5.3168721199035645, "rewards_train/margins_2": 5.140277862548828, "step": 663 }, { "epoch": 1.99, "learning_rate": 1.09790872801413e-09, "loss": 0.4783, "step": 664 }, { "epoch": 1.99, "logps_train/policy_1_2": -203.4707794189453, "logps_train/policy_1_l": -236.6838836669922, "logps_train/policy_1_w": -142.97625732421875, "logps_train/policy_2_2": -133.98204040527344, "logps_train/policy_2_w": -213.6945037841797, "logps_train/ref_1_2": -185.0, "logps_train/ref_1_l": -211.0, "logps_train/ref_1_w": -175.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -201.0, "rewards_train/1-2": -1.8494213819503784, "rewards_train/1-l": -2.5666303634643555, "rewards_train/1-w": 3.2219066619873047, "rewards_train/2-2": 2.9732813835144043, "rewards_train/2-w": -1.3346850872039795, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.78853702545166, "rewards_train/margins_1": 5.071328043937683, "rewards_train/margins_2": 4.307966470718384, "step": 664 }, { "epoch": 1.99, "logps_train/policy_1_2": -192.54006958007812, "logps_train/policy_1_l": -116.21115112304688, "logps_train/policy_1_w": -92.204345703125, "logps_train/policy_2_2": -119.05024719238281, "logps_train/policy_2_w": -147.32164001464844, "logps_train/ref_1_2": -175.0, "logps_train/ref_1_l": -98.0, "logps_train/ref_1_w": -122.0, "logps_train/ref_2_2": -153.0, "logps_train/ref_2_w": -142.0, "rewards_train/1-2": -1.7739291191101074, "rewards_train/1-l": -1.8213101625442505, "rewards_train/1-w": 2.9512453079223633, "rewards_train/2-2": 3.3992726802825928, "rewards_train/2-w": -0.49954745173454285, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.772555470466614, "rewards_train/margins_1": 4.725174427032471, "rewards_train/margins_2": 3.8988201320171356, "step": 664 }, { "epoch": 1.99, "logps_train/policy_1_2": -132.8491668701172, "logps_train/policy_1_l": -165.5685272216797, "logps_train/policy_1_w": -91.57093811035156, "logps_train/policy_2_2": -86.94721984863281, "logps_train/policy_2_w": -148.3299560546875, "logps_train/ref_1_2": -125.0, "logps_train/ref_1_l": -146.0, "logps_train/ref_1_w": -115.5, "logps_train/ref_2_2": -108.5, "logps_train/ref_2_w": -137.0, "rewards_train/1-2": -0.7613812685012817, "rewards_train/1-l": -2.0302906036376953, "rewards_train/1-w": 2.3927102088928223, "rewards_train/2-2": 2.160161256790161, "rewards_train/2-w": -1.1212760210037231, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.423000812530518, "rewards_train/margins_1": 3.154091477394104, "rewards_train/margins_2": 3.2814372777938843, "step": 664 }, { "epoch": 1.99, "logps_train/policy_1_2": -189.00433349609375, "logps_train/policy_1_l": -162.95425415039062, "logps_train/policy_1_w": -95.98216247558594, "logps_train/policy_2_2": -104.44973754882812, "logps_train/policy_2_w": -168.3525848388672, "logps_train/ref_1_2": -156.0, "logps_train/ref_1_l": -141.0, "logps_train/ref_1_w": -120.0, "logps_train/ref_2_2": -129.0, "logps_train/ref_2_w": -145.0, "rewards_train/1-2": -3.2816834449768066, "rewards_train/1-l": -2.1840004920959473, "rewards_train/1-w": 2.4416279792785645, "rewards_train/2-2": 2.4357872009277344, "rewards_train/2-w": -2.313969373703003, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.625628471374512, "rewards_train/margins_1": 5.723311424255371, "rewards_train/margins_2": 4.749756574630737, "step": 664 }, { "epoch": 1.99, "logps_train/policy_1_2": -201.68478393554688, "logps_train/policy_1_l": -213.2323760986328, "logps_train/policy_1_w": -141.9951934814453, "logps_train/policy_2_2": -136.3296356201172, "logps_train/policy_2_w": -209.24478149414062, "logps_train/ref_1_2": -194.0, "logps_train/ref_1_l": -186.0, "logps_train/ref_1_w": -178.0, "logps_train/ref_2_2": -171.0, "logps_train/ref_2_w": -203.0, "rewards_train/1-2": -0.7944536805152893, "rewards_train/1-l": -2.673236846923828, "rewards_train/1-w": 3.588371515274048, "rewards_train/2-2": 3.477583408355713, "rewards_train/2-w": -0.555336594581604, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.261608362197876, "rewards_train/margins_1": 4.382825195789337, "rewards_train/margins_2": 4.032920002937317, "step": 665 }, { "epoch": 1.99, "logps_train/policy_1_2": -227.86624145507812, "logps_train/policy_1_l": -187.20822143554688, "logps_train/policy_1_w": -159.59300231933594, "logps_train/policy_2_2": -151.59439086914062, "logps_train/policy_2_w": -236.27627563476562, "logps_train/ref_1_2": -211.0, "logps_train/ref_1_l": -169.0, "logps_train/ref_1_w": -193.0, "logps_train/ref_2_2": -186.0, "logps_train/ref_2_w": -222.0, "rewards_train/1-2": -1.6674845218658447, "rewards_train/1-l": -1.805196762084961, "rewards_train/1-w": 3.400074005126953, "rewards_train/2-2": 3.407747268676758, "rewards_train/2-w": -1.4385656118392944, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.205270767211914, "rewards_train/margins_1": 5.067558526992798, "rewards_train/margins_2": 4.846312880516052, "step": 665 }, { "epoch": 1.99, "logps_train/policy_1_2": -187.7613067626953, "logps_train/policy_1_l": -194.3363037109375, "logps_train/policy_1_w": -121.85494232177734, "logps_train/policy_2_2": -117.20389556884766, "logps_train/policy_2_w": -207.8697052001953, "logps_train/ref_1_2": -166.0, "logps_train/ref_1_l": -165.0, "logps_train/ref_1_w": -157.0, "logps_train/ref_2_2": -141.0, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -2.115583896636963, "rewards_train/1-l": -2.92230224609375, "rewards_train/1-w": 3.520951271057129, "rewards_train/2-2": 2.4251179695129395, "rewards_train/2-w": -2.5834548473358154, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.443253517150879, "rewards_train/margins_1": 5.636535167694092, "rewards_train/margins_2": 5.008572816848755, "step": 665 }, { "epoch": 1.99, "logps_train/policy_1_2": -200.41534423828125, "logps_train/policy_1_l": -195.515380859375, "logps_train/policy_1_w": -147.68165588378906, "logps_train/policy_2_2": -134.2269287109375, "logps_train/policy_2_w": -221.50997924804688, "logps_train/ref_1_2": -189.0, "logps_train/ref_1_l": -172.0, "logps_train/ref_1_w": -181.0, "logps_train/ref_2_2": -164.0, "logps_train/ref_2_w": -206.0, "rewards_train/1-2": -1.1403617858886719, "rewards_train/1-l": -2.3428475856781006, "rewards_train/1-w": 3.3182358741760254, "rewards_train/2-2": 2.997668981552124, "rewards_train/2-w": -1.5986063480377197, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 5.661083459854126, "rewards_train/margins_1": 4.458597660064697, "rewards_train/margins_2": 4.596275329589844, "step": 665 }, { "epoch": 1.99, "learning_rate": 4.87979278772921e-10, "loss": 0.4673, "step": 666 }, { "epoch": 1.99, "logps_train/policy_1_2": -228.14810180664062, "logps_train/policy_1_l": -94.39736938476562, "logps_train/policy_1_w": -83.88475799560547, "logps_train/policy_2_2": -149.69430541992188, "logps_train/policy_2_w": -136.22213745117188, "logps_train/ref_1_2": -205.0, "logps_train/ref_1_l": -83.0, "logps_train/ref_1_w": -105.5, "logps_train/ref_2_2": -183.0, "logps_train/ref_2_w": -125.5, "rewards_train/1-2": -2.386294364929199, "rewards_train/1-l": -1.1210846900939941, "rewards_train/1-w": 2.18735408782959, "rewards_train/2-2": 3.380960702896118, "rewards_train/2-w": -1.06654953956604, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 3.308438777923584, "rewards_train/margins_1": 4.573648452758789, "rewards_train/margins_2": 4.447510242462158, "step": 666 }, { "epoch": 1.99, "logps_train/policy_1_2": -158.4575958251953, "logps_train/policy_1_l": -151.39952087402344, "logps_train/policy_1_w": -117.08435821533203, "logps_train/policy_2_2": -102.3787841796875, "logps_train/policy_2_w": -193.44215393066406, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -134.0, "logps_train/ref_1_w": -145.0, "logps_train/ref_2_2": -126.5, "logps_train/ref_2_w": -172.0, "rewards_train/1-2": -1.495955228805542, "rewards_train/1-l": -1.7202261686325073, "rewards_train/1-w": 2.8052358627319336, "rewards_train/2-2": 2.411633253097534, "rewards_train/2-w": -2.1444106101989746, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.525462031364441, "rewards_train/margins_1": 4.301191091537476, "rewards_train/margins_2": 4.556043863296509, "step": 666 }, { "epoch": 1.99, "logps_train/policy_1_2": -203.6702880859375, "logps_train/policy_1_l": -195.12313842773438, "logps_train/policy_1_w": -138.1669158935547, "logps_train/policy_2_2": -131.65626525878906, "logps_train/policy_2_w": -214.97317504882812, "logps_train/ref_1_2": -186.0, "logps_train/ref_1_l": -176.0, "logps_train/ref_1_w": -167.0, "logps_train/ref_2_2": -162.0, "logps_train/ref_2_w": -197.0, "rewards_train/1-2": -1.7295295000076294, "rewards_train/1-l": -1.9043065309524536, "rewards_train/1-w": 2.910261869430542, "rewards_train/2-2": 3.113476037979126, "rewards_train/2-w": -1.7828649282455444, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.814568400382996, "rewards_train/margins_1": 4.639791369438171, "rewards_train/margins_2": 4.89634096622467, "step": 666 }, { "epoch": 1.99, "logps_train/policy_1_2": -154.02789306640625, "logps_train/policy_1_l": -131.3188018798828, "logps_train/policy_1_w": -90.67568969726562, "logps_train/policy_2_2": -98.36347961425781, "logps_train/policy_2_w": -161.56253051757812, "logps_train/ref_1_2": -139.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -118.0, "logps_train/ref_2_2": -120.5, "logps_train/ref_2_w": -140.0, "rewards_train/1-2": -1.5293529033660889, "rewards_train/1-l": -1.73764169216156, "rewards_train/1-w": 2.769930601119995, "rewards_train/2-2": 2.214433193206787, "rewards_train/2-w": -2.155473232269287, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.507572293281555, "rewards_train/margins_1": 4.299283504486084, "rewards_train/margins_2": 4.369906425476074, "step": 666 }, { "epoch": 2.0, "logps_train/policy_1_2": -151.81582641601562, "logps_train/policy_1_l": -139.27365112304688, "logps_train/policy_1_w": -98.09596252441406, "logps_train/policy_2_2": -96.10813903808594, "logps_train/policy_2_w": -157.97618103027344, "logps_train/ref_1_2": -140.0, "logps_train/ref_1_l": -119.0, "logps_train/ref_1_w": -124.0, "logps_train/ref_2_2": -119.5, "logps_train/ref_2_w": -144.0, "rewards_train/1-2": -1.1331450939178467, "rewards_train/1-l": -2.052072048187256, "rewards_train/1-w": 2.5685291290283203, "rewards_train/2-2": 2.3083267211914062, "rewards_train/2-w": -1.387852668762207, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 4.620601177215576, "rewards_train/margins_1": 3.701674222946167, "rewards_train/margins_2": 3.6961793899536133, "step": 667 }, { "epoch": 2.0, "logps_train/policy_1_2": -145.98817443847656, "logps_train/policy_1_l": -137.731201171875, "logps_train/policy_1_w": -96.94158935546875, "logps_train/policy_2_2": -86.72977447509766, "logps_train/policy_2_w": -157.5399169921875, "logps_train/ref_1_2": -129.0, "logps_train/ref_1_l": -121.5, "logps_train/ref_1_w": -120.5, "logps_train/ref_2_2": -108.0, "logps_train/ref_2_w": -141.0, "rewards_train/1-2": -1.7255746126174927, "rewards_train/1-l": -1.6355226039886475, "rewards_train/1-w": 2.37788724899292, "rewards_train/2-2": 2.1311240196228027, "rewards_train/2-w": -1.6236696243286133, "rewards_train/accuracies": 0.9375, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.013409852981567, "rewards_train/margins_1": 4.103461861610413, "rewards_train/margins_2": 3.754793643951416, "step": 667 }, { "epoch": 2.0, "logps_train/policy_1_2": -195.89413452148438, "logps_train/policy_1_l": -227.92153930664062, "logps_train/policy_1_w": -147.92520141601562, "logps_train/policy_2_2": -119.82999420166016, "logps_train/policy_2_w": -238.19329833984375, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -195.0, "logps_train/ref_1_w": -183.0, "logps_train/ref_2_2": -150.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -2.0133886337280273, "rewards_train/1-l": -3.2747724056243896, "rewards_train/1-w": 3.5254480838775635, "rewards_train/2-2": 2.9834065437316895, "rewards_train/2-w": -2.4622983932495117, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.800220489501953, "rewards_train/margins_1": 5.538836717605591, "rewards_train/margins_2": 5.445704936981201, "step": 667 }, { "epoch": 2.0, "logps_train/policy_1_2": -235.60671997070312, "logps_train/policy_1_l": -225.74542236328125, "logps_train/policy_1_w": -155.78607177734375, "logps_train/policy_2_2": -155.12197875976562, "logps_train/policy_2_w": -221.94598388671875, "logps_train/ref_1_2": -221.0, "logps_train/ref_1_l": -192.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -194.0, "logps_train/ref_2_w": -212.0, "rewards_train/1-2": -1.4700477123260498, "rewards_train/1-l": -3.3473939895629883, "rewards_train/1-w": 3.3079164028167725, "rewards_train/2-2": 3.865927219390869, "rewards_train/2-w": -1.0524122714996338, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 6.655310392379761, "rewards_train/margins_1": 4.777964115142822, "rewards_train/margins_2": 4.918339490890503, "step": 667 }, { "epoch": 2.0, "learning_rate": 1.2199779638566444e-10, "loss": 0.5321, "step": 668 }, { "epoch": 2.0, "logps_train/policy_1_2": -233.90582275390625, "logps_train/policy_1_l": -147.3253936767578, "logps_train/policy_1_w": -112.5025863647461, "logps_train/policy_2_2": -144.44175720214844, "logps_train/policy_2_w": -191.56663513183594, "logps_train/ref_1_2": -207.0, "logps_train/ref_1_l": -124.0, "logps_train/ref_1_w": -143.0, "logps_train/ref_2_2": -180.0, "logps_train/ref_2_w": -174.0, "rewards_train/1-2": -2.6612861156463623, "rewards_train/1-l": -2.351680278778076, "rewards_train/1-w": 3.0618505477905273, "rewards_train/2-2": 3.479261875152588, "rewards_train/2-w": -1.7105696201324463, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.4135308265686035, "rewards_train/margins_1": 5.72313666343689, "rewards_train/margins_2": 5.189831495285034, "step": 668 }, { "epoch": 2.0, "logps_train/policy_1_2": -153.06942749023438, "logps_train/policy_1_l": -213.72406005859375, "logps_train/policy_1_w": -136.7153778076172, "logps_train/policy_2_2": -101.3427963256836, "logps_train/policy_2_w": -200.9437255859375, "logps_train/ref_1_2": -141.0, "logps_train/ref_1_l": -184.0, "logps_train/ref_1_w": -166.0, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -186.0, "rewards_train/1-2": -1.2194430828094482, "rewards_train/1-l": -2.9194765090942383, "rewards_train/1-w": 2.951215982437134, "rewards_train/2-2": 2.177536725997925, "rewards_train/2-w": -1.4865598678588867, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.870692491531372, "rewards_train/margins_1": 4.170659065246582, "rewards_train/margins_2": 3.6640965938568115, "step": 668 }, { "epoch": 2.0, "logps_train/policy_1_2": -123.49601745605469, "logps_train/policy_1_l": -109.50711059570312, "logps_train/policy_1_w": -63.048675537109375, "logps_train/policy_2_2": -62.23661804199219, "logps_train/policy_2_w": -134.8328399658203, "logps_train/ref_1_2": -103.5, "logps_train/ref_1_l": -85.5, "logps_train/ref_1_w": -86.0, "logps_train/ref_2_2": -84.5, "logps_train/ref_2_w": -114.0, "rewards_train/1-2": -2.0019450187683105, "rewards_train/1-l": -2.3768839836120605, "rewards_train/1-w": 2.315835952758789, "rewards_train/2-2": 2.205244302749634, "rewards_train/2-w": -2.0727367401123047, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 0.9375, "rewards_train/margins": 4.69271993637085, "rewards_train/margins_1": 4.3177809715271, "rewards_train/margins_2": 4.2779810428619385, "step": 668 }, { "epoch": 2.0, "logps_train/policy_1_2": -156.78515625, "logps_train/policy_1_l": -178.8747100830078, "logps_train/policy_1_w": -129.9874725341797, "logps_train/policy_2_2": -98.84098815917969, "logps_train/policy_2_w": -193.99464416503906, "logps_train/ref_1_2": -143.0, "logps_train/ref_1_l": -154.0, "logps_train/ref_1_w": -161.0, "logps_train/ref_2_2": -124.5, "logps_train/ref_2_w": -182.0, "rewards_train/1-2": -1.3462893962860107, "rewards_train/1-l": -2.530733108520508, "rewards_train/1-w": 3.0614092350006104, "rewards_train/2-2": 2.5664868354797363, "rewards_train/2-w": -1.2502458095550537, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.9375, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.592142343521118, "rewards_train/margins_1": 4.407698631286621, "rewards_train/margins_2": 3.81673264503479, "step": 668 }, { "epoch": 2.0, "logps_train/policy_1_2": -191.1619110107422, "logps_train/policy_1_l": -217.16348266601562, "logps_train/policy_1_w": -151.99826049804688, "logps_train/policy_2_2": -131.29794311523438, "logps_train/policy_2_w": -227.50770568847656, "logps_train/ref_1_2": -181.0, "logps_train/ref_1_l": -189.0, "logps_train/ref_1_w": -189.0, "logps_train/ref_2_2": -163.0, "logps_train/ref_2_w": -214.0, "rewards_train/1-2": -0.970097541809082, "rewards_train/1-l": -2.798281669616699, "rewards_train/1-w": 3.6496853828430176, "rewards_train/2-2": 3.1436424255371094, "rewards_train/2-w": -1.401747703552246, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 6.447967052459717, "rewards_train/margins_1": 4.6197829246521, "rewards_train/margins_2": 4.5453901290893555, "step": 669 }, { "epoch": 2.0, "logps_train/policy_1_2": -148.3190460205078, "logps_train/policy_1_l": -134.2628173828125, "logps_train/policy_1_w": -104.65802764892578, "logps_train/policy_2_2": -88.65853118896484, "logps_train/policy_2_w": -184.4242401123047, "logps_train/ref_1_2": -131.0, "logps_train/ref_1_l": -114.0, "logps_train/ref_1_w": -137.0, "logps_train/ref_2_2": -114.0, "logps_train/ref_2_w": -161.0, "rewards_train/1-2": -1.7696000337600708, "rewards_train/1-l": -2.0045528411865234, "rewards_train/1-w": 3.2088065147399902, "rewards_train/2-2": 2.5283851623535156, "rewards_train/2-w": -2.2803142070770264, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.213359355926514, "rewards_train/margins_1": 4.978406548500061, "rewards_train/margins_2": 4.808699369430542, "step": 669 }, { "epoch": 2.0, "logps_train/policy_1_2": -172.2886199951172, "logps_train/policy_1_l": -158.56398010253906, "logps_train/policy_1_w": -87.41532135009766, "logps_train/policy_2_2": -92.24039459228516, "logps_train/policy_2_w": -168.10755920410156, "logps_train/ref_1_2": -150.0, "logps_train/ref_1_l": -133.0, "logps_train/ref_1_w": -117.5, "logps_train/ref_2_2": -123.0, "logps_train/ref_2_w": -148.0, "rewards_train/1-2": -2.223393440246582, "rewards_train/1-l": -2.5931174755096436, "rewards_train/1-w": 2.9959678649902344, "rewards_train/2-2": 3.096663236618042, "rewards_train/2-w": -2.017005681991577, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 1.0, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.589085340499878, "rewards_train/margins_1": 5.219361305236816, "rewards_train/margins_2": 5.113668918609619, "step": 669 }, { "epoch": 2.0, "logps_train/policy_1_2": -181.55091857910156, "logps_train/policy_1_l": -182.03050231933594, "logps_train/policy_1_w": -110.37123107910156, "logps_train/policy_2_2": -129.6824493408203, "logps_train/policy_2_w": -163.61788940429688, "logps_train/ref_1_2": -176.0, "logps_train/ref_1_l": -160.0, "logps_train/ref_1_w": -140.0, "logps_train/ref_2_2": -160.0, "logps_train/ref_2_w": -157.0, "rewards_train/1-2": -0.6011865735054016, "rewards_train/1-l": -2.1968986988067627, "rewards_train/1-w": 2.9884629249572754, "rewards_train/2-2": 3.042888641357422, "rewards_train/2-w": -0.6508522033691406, "rewards_train/accuracies": 1.0, "rewards_train/accuracies_1": 0.875, "rewards_train/accuracies_2": 1.0, "rewards_train/margins": 5.185361623764038, "rewards_train/margins_1": 3.589649498462677, "rewards_train/margins_2": 3.6937408447265625, "step": 669 }, { "epoch": 2.01, "learning_rate": 0.0, "loss": 0.4958, "step": 670 } ], "logging_steps": 2, "max_steps": 670, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 335, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }