{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 12032, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008311170212765958, "grad_norm": 401.13100015843634, "learning_rate": 3.3195020746887967e-09, "loss": 1.3266, "step": 5, "success_rate.epoch.env.logic": 0.0, "success_rate.epoch.env.science": 0.75, "success_rate.epoch.env_macro_mean": 0.375, "success_rate.epoch.global": 0.6, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.375, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9984581497797357, "tokens_p.mean_in_band": 0.5880748820754716, "tokens_rate.above_band": 0.9553872053872053, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04461279461279461 }, { "epoch": 0.0016622340425531915, "grad_norm": 187.50685532828885, "learning_rate": 7.468879668049792e-09, "loss": 1.3018, "step": 10, "success_rate.epoch.env.logic": 0.0, "success_rate.epoch.env.science": 0.5, "success_rate.epoch.env_macro_mean": 0.25, "success_rate.epoch.global": 0.4, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.125, "success_rate.window.global": 0.2, "tokens_p.mean_above_band": 0.9980440320232896, "tokens_p.mean_below_band": 8.585629984736443e-10, "tokens_p.mean_in_band": 0.5657913773148148, "tokens_rate.above_band": 0.9258760107816711, "tokens_rate.below_band": 0.0013477088948787063, "tokens_rate.in_band": 0.07277628032345014 }, { "epoch": 0.0024933510638297874, "grad_norm": 2423.9894087591297, "learning_rate": 1.1618257261410788e-08, "loss": 1.184, "step": 15, "success_rate.epoch.env.logic": 0.2, "success_rate.epoch.env.science": 0.52, "success_rate.epoch.env_macro_mean": 0.36, "success_rate.epoch.global": 0.4666666666666667, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.5555555555555556, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.99170197740113, "tokens_p.mean_below_band": 8.585629984736443e-10, "tokens_p.mean_in_band": 0.6497112771739131, "tokens_rate.above_band": 0.8805970149253731, "tokens_rate.below_band": 0.004975124378109453, "tokens_rate.in_band": 0.11442786069651742 }, { "epoch": 0.003324468085106383, "grad_norm": 238.72394610631142, "learning_rate": 1.5767634854771783e-08, "loss": 1.1159, "step": 20, "success_rate.epoch.env.logic": 0.2, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.0, "success_rate.epoch.env.science": 0.5625, "success_rate.epoch.env_macro_mean": 0.440625, "success_rate.epoch.global": 0.525, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9904801324503312, "tokens_p.mean_in_band": 0.6736010174418605, "tokens_rate.above_band": 0.7783505154639175, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.22164948453608246 }, { "epoch": 0.004155585106382979, "grad_norm": 400.3309500951242, "learning_rate": 1.991701244813278e-08, "loss": 1.5418, "step": 25, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.16666666666666666, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.0, "success_rate.epoch.env.science": 0.5, "success_rate.epoch.env_macro_mean": 0.5333333333333333, "success_rate.epoch.global": 0.5, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.16666666666666666, "success_rate.window.env_macro_mean": 0.5416666666666667, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9953038674033149, "tokens_p.mean_in_band": 0.5205592105263158, "tokens_rate.above_band": 0.9407484407484408, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.059251559251559255 }, { "epoch": 0.004986702127659575, "grad_norm": 319.49929599700613, "learning_rate": 2.4066390041493776e-08, "loss": 1.2687, "step": 30, "success_rate.epoch.env.ded": 0.6666666666666666, "success_rate.epoch.env.logic": 0.16666666666666666, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.0, "success_rate.epoch.env.science": 0.5555555555555556, "success_rate.epoch.env_macro_mean": 0.47777777777777775, "success_rate.epoch.global": 0.5333333333333333, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4642857142857143, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9868908898305084, "tokens_p.mean_in_band": 0.6214446107784432, "tokens_rate.above_band": 0.7386541471048513, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.26134585289514867 }, { "epoch": 0.00581781914893617, "grad_norm": 228.4919815949419, "learning_rate": 2.821576763485477e-08, "loss": 1.3086, "step": 35, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.ded": 0.75, "success_rate.epoch.env.logic": 0.14285714285714285, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.0, "success_rate.epoch.env.science": 0.5686274509803921, "success_rate.epoch.env_macro_mean": 0.5769140989729226, "success_rate.epoch.global": 0.5571428571428572, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9984144542772861, "tokens_p.mean_in_band": 0.5356794084821429, "tokens_rate.above_band": 0.9680182752712736, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03198172472872644 }, { "epoch": 0.006648936170212766, "grad_norm": 308.61285894032443, "learning_rate": 3.2365145228215765e-08, "loss": 1.5794, "step": 40, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.75, "success_rate.epoch.env.logic": 0.125, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.5789473684210527, "success_rate.epoch.env_macro_mean": 0.6838972431077694, "success_rate.epoch.global": 0.575, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9973583633093526, "tokens_p.mean_in_band": 0.6287878787878788, "tokens_rate.above_band": 0.9439728353140917, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05602716468590832 }, { "epoch": 0.007480053191489362, "grad_norm": 566.7855176847529, "learning_rate": 3.651452282157676e-08, "loss": 1.4112, "step": 45, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.75, "success_rate.epoch.env.logic": 0.1111111111111111, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.59375, "success_rate.epoch.env_macro_mean": 0.6364087301587301, "success_rate.epoch.global": 0.5777777777777777, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4285714285714286, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9972698252688172, "tokens_p.mean_in_band": 0.6447531960227273, "tokens_rate.above_band": 0.9441624365482234, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05583756345177665 }, { "epoch": 0.008311170212765957, "grad_norm": 499.637516240341, "learning_rate": 4.066390041493776e-08, "loss": 1.329, "step": 50, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.75, "success_rate.epoch.env.logic": 0.1111111111111111, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6231884057971014, "success_rate.epoch.env_macro_mean": 0.6406142167011731, "success_rate.epoch.global": 0.6161616161616161, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9166666666666667, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9896941489361702, "tokens_p.mean_in_band": 0.6927490234375, "tokens_rate.above_band": 0.746031746031746, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.25396825396825395 }, { "epoch": 0.009142287234042553, "grad_norm": 163.52332864641429, "learning_rate": 4.4813278008298754e-08, "loss": 1.2468, "step": 55, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.75, "success_rate.epoch.env.logic": 0.1, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.6103896103896104, "success_rate.epoch.env_macro_mean": 0.6371985157699444, "success_rate.epoch.global": 0.6055045871559633, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9889583333333334, "tokens_p.mean_in_band": 0.5577256944444444, "tokens_rate.above_band": 0.7352941176470589, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2647058823529412 }, { "epoch": 0.00997340425531915, "grad_norm": 397.74951402324774, "learning_rate": 4.8962655601659744e-08, "loss": 1.2968, "step": 60, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.75, "success_rate.epoch.env.logic": 0.18181818181818182, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.6024096385542169, "success_rate.epoch.env_macro_mean": 0.6286992124341522, "success_rate.epoch.global": 0.5966386554621849, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9947509765625, "tokens_p.mean_in_band": 0.705577761627907, "tokens_rate.above_band": 0.8561872909698997, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14381270903010032 }, { "epoch": 0.010804521276595746, "grad_norm": 293.6381642270671, "learning_rate": 5.3112033195020747e-08, "loss": 1.2173, "step": 65, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.8, "success_rate.epoch.env.logic": 0.18181818181818182, "success_rate.epoch.env.math": 0.9285714285714286, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.6, "success_rate.epoch.env_macro_mean": 0.6252937538651823, "success_rate.epoch.global": 0.5968992248062015, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6904761904761904, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9917561349693251, "tokens_p.mean_in_band": 0.627734375, "tokens_rate.above_band": 0.844559585492228, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15544041450777202 }, { "epoch": 0.01163563829787234, "grad_norm": 461.24478905392465, "learning_rate": 5.7261410788381736e-08, "loss": 1.2202, "step": 70, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.8, "success_rate.epoch.env.logic": 0.15384615384615385, "success_rate.epoch.env.math": 0.9333333333333333, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.5979381443298969, "success_rate.epoch.env_macro_mean": 0.6216834711680074, "success_rate.epoch.global": 0.5899280575539568, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.5238095238095238, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9981339126559715, "tokens_p.mean_in_band": 0.4951171875, "tokens_rate.above_band": 0.9342214820982515, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06577851790174855 }, { "epoch": 0.012466755319148936, "grad_norm": 547.4042947917907, "learning_rate": 6.141078838174275e-08, "loss": 1.506, "step": 75, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.8, "success_rate.epoch.env.logic": 0.13333333333333333, "success_rate.epoch.env.math": 0.9333333333333333, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.6153846153846154, "success_rate.epoch.env_macro_mean": 0.6164835164835166, "success_rate.epoch.global": 0.5906040268456376, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.2857142857142857, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.996728650137741, "tokens_p.mean_in_band": 0.6344747340425532, "tokens_rate.above_band": 0.9391979301423027, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06080206985769728 }, { "epoch": 0.013297872340425532, "grad_norm": 157.51357010421583, "learning_rate": 6.556016597510374e-08, "loss": 1.0704, "step": 80, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6666666666666666, "success_rate.epoch.env.logic": 0.13333333333333333, "success_rate.epoch.env.math": 0.9444444444444444, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.5925925925925926, "success_rate.epoch.env_macro_mean": 0.592365835222978, "success_rate.epoch.global": 0.5759493670886076, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9880798969072165, "tokens_p.mean_in_band": 0.6870028409090909, "tokens_rate.above_band": 0.8605830164765526, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1394169835234474 }, { "epoch": 0.014128989361702128, "grad_norm": 561.2238683461217, "learning_rate": 6.970954356846473e-08, "loss": 1.3092, "step": 85, "success_rate.epoch.env.agentgym:alfworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6666666666666666, "success_rate.epoch.env.logic": 0.1111111111111111, "success_rate.epoch.env.math": 0.8947368421052632, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.6017699115044248, "success_rate.epoch.env_macro_mean": 0.5595916677492297, "success_rate.epoch.global": 0.5654761904761905, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.2, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9983031674208145, "tokens_p.mean_in_band": 0.5675838694852942, "tokens_rate.above_band": 0.9629629629629629, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037037037037037035 }, { "epoch": 0.014960106382978724, "grad_norm": 280.93420534873474, "learning_rate": 7.385892116182573e-08, "loss": 1.2831, "step": 90, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.6, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6666666666666666, "success_rate.epoch.env.logic": 0.15789473684210525, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.6101694915254238, "success_rate.epoch.env_macro_mean": 0.5096985047364173, "success_rate.epoch.global": 0.5730337078651685, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.99908447265625, "tokens_p.mean_below_band": 5.617039278149605e-09, "tokens_p.mean_in_band": 0.2380484271523179, "tokens_rate.above_band": 0.8711186729051468, "tokens_rate.below_band": 0.0004253509145044662, "tokens_rate.in_band": 0.1284559761803488 }, { "epoch": 0.01579122340425532, "grad_norm": 351.01882732407955, "learning_rate": 7.800829875518672e-08, "loss": 1.6298, "step": 95, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.6, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6666666666666666, "success_rate.epoch.env.logic": 0.15789473684210525, "success_rate.epoch.env.math": 0.9090909090909091, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.6111111111111112, "success_rate.epoch.env_macro_mean": 0.5109525708209919, "success_rate.epoch.global": 0.5797872340425532, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9855587121212122, "tokens_p.mean_in_band": 0.6205800374348959, "tokens_rate.above_band": 0.88, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12 }, { "epoch": 0.016622340425531915, "grad_norm": 402.86886331260774, "learning_rate": 8.215767634854771e-08, "loss": 1.5518, "step": 100, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.6, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.15, "success_rate.epoch.env.math": 0.9090909090909091, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.6194029850746269, "success_rate.epoch.env_macro_mean": 0.4990974510564063, "success_rate.epoch.global": 0.5808080808080808, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9950442950490073, "tokens_p.mean_in_band": 0.5289463141025641, "tokens_rate.above_band": 0.9026769509981851, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09732304900181488 }, { "epoch": 0.01745345744680851, "grad_norm": 452.85894919436043, "learning_rate": 8.630705394190871e-08, "loss": 1.1647, "step": 105, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.6, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.19047619047619047, "success_rate.epoch.env.math": 0.9166666666666666, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.6, "success_rate.epoch.env_macro_mean": 0.49374999999999997, "success_rate.epoch.global": 0.5721153846153846, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.16666666666666666, "success_rate.window.env_macro_mean": 0.5416666666666667, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9875739644970414, "tokens_p.mean_in_band": 0.6503106725146199, "tokens_rate.above_band": 0.8316929133858267, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16830708661417323 }, { "epoch": 0.018284574468085107, "grad_norm": 214.39100868957553, "learning_rate": 9.04564315352697e-08, "loss": 1.2129, "step": 110, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.6, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.18181818181818182, "success_rate.epoch.env.math": 0.92, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.6095890410958904, "success_rate.epoch.env_macro_mean": 0.4942830457214019, "success_rate.epoch.global": 0.5779816513761468, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9930876655836041, "tokens_p.mean_in_band": 0.5333141321044547, "tokens_rate.above_band": 0.8600601891659502, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13993981083404988 }, { "epoch": 0.0191156914893617, "grad_norm": 622.4085970133178, "learning_rate": 9.46058091286307e-08, "loss": 1.5336, "step": 115, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.6, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.21739130434782608, "success_rate.epoch.env.math": 0.92, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.6103896103896104, "success_rate.epoch.env_macro_mean": 0.4965976143421796, "success_rate.epoch.global": 0.5789473684210527, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.990244708994709, "tokens_p.mean_in_band": 0.634046052631579, "tokens_rate.above_band": 0.8325991189427313, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16740088105726872 }, { "epoch": 0.0199468085106383, "grad_norm": 221.59897774532945, "learning_rate": 9.87551867219917e-08, "loss": 1.241, "step": 120, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.6, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.21739130434782608, "success_rate.epoch.env.math": 0.92, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.5975609756097561, "success_rate.epoch.env_macro_mean": 0.4949940349946978, "success_rate.epoch.global": 0.5714285714285714, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9883130081300813, "tokens_p.mean_below_band": 3.688037395477295e-07, "tokens_p.mean_in_band": 0.5776154891304348, "tokens_rate.above_band": 0.8367346938775511, "tokens_rate.below_band": 0.006802721088435374, "tokens_rate.in_band": 0.1564625850340136 }, { "epoch": 0.020777925531914893, "grad_norm": 180.77640150723752, "learning_rate": 1.0290456431535268e-07, "loss": 1.2421, "step": 125, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.6, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.25, "success_rate.epoch.env.math": 0.9230769230769231, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.5882352941176471, "success_rate.epoch.env_macro_mean": 0.4951640271493213, "success_rate.epoch.global": 0.5645161290322581, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9948089700996677, "tokens_p.mean_in_band": 0.5564692982456141, "tokens_rate.above_band": 0.840782122905028, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15921787709497207 }, { "epoch": 0.02160904255319149, "grad_norm": 335.97680274772983, "learning_rate": 1.0705394190871369e-07, "loss": 1.2298, "step": 130, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.5714285714285714, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.24, "success_rate.epoch.env.math": 0.9230769230769231, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.5795454545454546, "success_rate.epoch.env_macro_mean": 0.4892563686313687, "success_rate.epoch.global": 0.556420233463035, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.2857142857142857, "success_rate.window.env_macro_mean": 0.2619047619047619, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9968717750257998, "tokens_p.mean_in_band": 0.58953857421875, "tokens_rate.above_band": 0.968031968031968, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03196803196803197 }, { "epoch": 0.022440159574468085, "grad_norm": 667.7031214506737, "learning_rate": 1.1120331950207469e-07, "loss": 1.4165, "step": 135, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.2692307692307692, "success_rate.epoch.env.math": 0.9230769230769231, "success_rate.epoch.env.sat": 0.09090909090909091, "success_rate.epoch.env.science": 0.5769230769230769, "success_rate.epoch.env_macro_mean": 0.4981424825174825, "success_rate.epoch.global": 0.556390977443609, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6428571428571428, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9942434210526315, "tokens_p.mean_in_band": 0.6302400914634146, "tokens_rate.above_band": 0.9026128266033254, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09738717339667459 }, { "epoch": 0.02327127659574468, "grad_norm": 631.8988811708475, "learning_rate": 1.1535269709543567e-07, "loss": 1.4654, "step": 140, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.25, "success_rate.epoch.env.math": 0.9259259259259259, "success_rate.epoch.env.sat": 0.09090909090909091, "success_rate.epoch.env.science": 0.5873015873015873, "success_rate.epoch.env_macro_mean": 0.49739207551707554, "success_rate.epoch.global": 0.5615942028985508, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9972662141779789, "tokens_p.mean_in_band": 0.5901772103658537, "tokens_rate.above_band": 0.9417613636363636, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05823863636363636 }, { "epoch": 0.024102393617021278, "grad_norm": 215.22841132298794, "learning_rate": 1.1950207468879667e-07, "loss": 1.1381, "step": 145, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.25, "success_rate.epoch.env.math": 0.9285714285714286, "success_rate.epoch.env.sat": 0.09090909090909091, "success_rate.epoch.env.science": 0.5757575757575758, "success_rate.epoch.env_macro_mean": 0.4962797619047619, "success_rate.epoch.global": 0.5559440559440559, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9858630952380952, "tokens_p.mean_in_band": 0.6131184895833334, "tokens_rate.above_band": 0.7241379310344828, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.27586206896551724 }, { "epoch": 0.024933510638297872, "grad_norm": 551.1213200679001, "learning_rate": 1.2365145228215768e-07, "loss": 1.1888, "step": 150, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.2413793103448276, "success_rate.epoch.env.math": 0.9333333333333333, "success_rate.epoch.env.sat": 0.09090909090909091, "success_rate.epoch.env.science": 0.5882352941176471, "success_rate.epoch.env_macro_mean": 0.4973571285881124, "success_rate.epoch.global": 0.5661016949152542, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9991122159090909, "tokens_p.mean_in_band": 0.5625500801282052, "tokens_rate.above_band": 0.9475100942126514, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.052489905787348586 }, { "epoch": 0.02576462765957447, "grad_norm": 195.0533745906051, "learning_rate": 1.2780082987551865e-07, "loss": 1.2998, "step": 155, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.23333333333333334, "success_rate.epoch.env.math": 0.9333333333333333, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.5924170616113744, "success_rate.epoch.env_macro_mean": 0.50160895088324, "success_rate.epoch.global": 0.5672131147540984, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4285714285714286, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9867738589211619, "tokens_p.mean_in_band": 0.6559836647727273, "tokens_rate.above_band": 0.8456140350877193, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1543859649122807 }, { "epoch": 0.026595744680851064, "grad_norm": 460.42342424184585, "learning_rate": 1.3195020746887966e-07, "loss": 1.2685, "step": 160, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.23333333333333334, "success_rate.epoch.env.math": 0.9090909090909091, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.5898617511520737, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.554008208040466, "success_rate.epoch.global": 0.5682539682539682, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.5, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.7222222222222222, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9964561855670103, "tokens_p.mean_in_band": 0.6408305921052632, "tokens_rate.above_band": 0.9387096774193548, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06129032258064516 }, { "epoch": 0.02742686170212766, "grad_norm": 414.22033111665974, "learning_rate": 1.3609958506224065e-07, "loss": 1.5084, "step": 165, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.25806451612903225, "success_rate.epoch.env.math": 0.9117647058823529, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.5829596412556054, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5562863046727632, "success_rate.epoch.global": 0.5648148148148148, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.2857142857142857, "success_rate.window.env_macro_mean": 0.5714285714285714, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.991386217948718, "tokens_p.mean_in_band": 0.46859019886363634, "tokens_rate.above_band": 0.5864661654135338, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.41353383458646614 }, { "epoch": 0.028257978723404256, "grad_norm": 430.396377901239, "learning_rate": 1.4024896265560164e-07, "loss": 1.257, "step": 170, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.25, "success_rate.epoch.env.math": 0.9142857142857143, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.5851528384279476, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.555201797232365, "success_rate.epoch.global": 0.5645645645645646, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.39285714285714285, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9998577389984825, "tokens_p.mean_in_band": 0.5501185825892857, "tokens_rate.above_band": 0.9216783216783216, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07832167832167833 }, { "epoch": 0.02908909574468085, "grad_norm": 187.00444692959334, "learning_rate": 1.4439834024896266e-07, "loss": 1.3097, "step": 175, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.24242424242424243, "success_rate.epoch.env.math": 0.9142857142857143, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.5864978902953587, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5538989959876035, "success_rate.epoch.global": 0.5626822157434402, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.20833333333333334, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9946296296296296, "tokens_p.mean_below_band": 1.6391277313232422e-07, "tokens_p.mean_in_band": 0.5329137731481481, "tokens_rate.above_band": 0.9246575342465754, "tokens_rate.below_band": 0.0013698630136986301, "tokens_rate.in_band": 0.07397260273972603 }, { "epoch": 0.02992021276595745, "grad_norm": 573.263562105385, "learning_rate": 1.4854771784232365e-07, "loss": 1.4987, "step": 180, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.2571428571428571, "success_rate.epoch.env.math": 0.9166666666666666, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.5819672131147541, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5552955393119328, "success_rate.epoch.global": 0.5609065155807366, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.6428571428571429, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9948074018126888, "tokens_p.mean_in_band": 0.6211734693877551, "tokens_rate.above_band": 0.9310829817158931, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06891701828410689 }, { "epoch": 0.030751329787234043, "grad_norm": 426.9656445191381, "learning_rate": 1.5269709543568464e-07, "loss": 1.4918, "step": 185, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.24324324324324326, "success_rate.epoch.env.math": 0.9166666666666666, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.588, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5467053980387314, "success_rate.epoch.global": 0.5607734806629834, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.2857142857142857, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9993459302325581, "tokens_p.mean_in_band": 0.6091392780172413, "tokens_rate.above_band": 0.956973293768546, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04302670623145401 }, { "epoch": 0.03158244680851064, "grad_norm": 593.9932514366184, "learning_rate": 1.5684647302904563e-07, "loss": 1.2742, "step": 190, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.23076923076923078, "success_rate.epoch.env.math": 0.9166666666666666, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.5836575875486382, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5443078058512559, "success_rate.epoch.global": 0.553763440860215, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.14285714285714285, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9953314266487214, "tokens_p.mean_in_band": 0.5749399038461539, "tokens_rate.above_band": 0.9195544554455446, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08044554455445545 }, { "epoch": 0.03241356382978723, "grad_norm": 237.91590431618616, "learning_rate": 1.6099585062240665e-07, "loss": 1.3347, "step": 195, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.21951219512195122, "success_rate.epoch.env.math": 0.9166666666666666, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.5795454545454546, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5426001204456489, "success_rate.epoch.global": 0.5485564304461942, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9949776785714286, "tokens_p.mean_below_band": 8.585629984736443e-10, "tokens_p.mean_in_band": 0.5992647058823529, "tokens_rate.above_band": 0.9134253450439147, "tokens_rate.below_band": 0.0012547051442910915, "tokens_rate.in_band": 0.08531994981179424 }, { "epoch": 0.03324468085106383, "grad_norm": 344.97766277721, "learning_rate": 1.651452282157676e-07, "loss": 1.3244, "step": 200, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.23809523809523808, "success_rate.epoch.env.math": 0.9166666666666666, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.5845588235294118, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5447589810334909, "success_rate.epoch.global": 0.5524296675191815, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9910102739726028, "tokens_p.mean_in_band": 0.7010789903856459, "tokens_rate.above_band": 0.8390804597701149, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16091954022988506 }, { "epoch": 0.03407579787234043, "grad_norm": 285.6487390063705, "learning_rate": 1.6929460580912862e-07, "loss": 1.1814, "step": 205, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.25, "success_rate.epoch.env.math": 0.9166666666666666, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.5827338129496403, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5458789534029341, "success_rate.epoch.global": 0.5513784461152882, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.35714285714285715, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9866513437057991, "tokens_p.mean_in_band": 0.7105287063953488, "tokens_rate.above_band": 0.8915510718789408, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10844892812105927 }, { "epoch": 0.03490691489361702, "grad_norm": 418.8657321104007, "learning_rate": 1.7344398340248961e-07, "loss": 1.2693, "step": 210, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.25, "success_rate.epoch.env.math": 0.918918918918919, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.5824561403508772, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5456898544101846, "success_rate.epoch.global": 0.5514705882352942, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6428571428571428, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9955065359477124, "tokens_p.mean_in_band": 0.5977038871951219, "tokens_rate.above_band": 0.8818443804034583, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11815561959654179 }, { "epoch": 0.035738031914893616, "grad_norm": 287.1345069856305, "learning_rate": 1.775933609958506e-07, "loss": 1.2884, "step": 215, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.2391304347826087, "success_rate.epoch.env.math": 0.9230769230769231, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.584192439862543, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5451370475715489, "success_rate.epoch.global": 0.5526315789473685, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9946584791386271, "tokens_p.mean_in_band": 0.6318359375, "tokens_rate.above_band": 0.925280199252802, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.074719800747198 }, { "epoch": 0.036569148936170214, "grad_norm": 386.6212346601495, "learning_rate": 1.8174273858921162e-07, "loss": 1.2632, "step": 220, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.2391304347826087, "success_rate.epoch.env.math": 0.925, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.5824915824915825, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5389888991267224, "success_rate.epoch.global": 0.550351288056206, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.35714285714285715, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9949178200692042, "tokens_p.mean_below_band": 2.455635694786906e-10, "tokens_p.mean_in_band": 0.505401611328125, "tokens_rate.above_band": 0.9444444444444444, "tokens_rate.below_band": 0.0032679738562091504, "tokens_rate.in_band": 0.05228758169934641 }, { "epoch": 0.03740026595744681, "grad_norm": 260.928410841472, "learning_rate": 1.858921161825726e-07, "loss": 1.2045, "step": 225, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.5454545454545454, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.2708333333333333, "success_rate.epoch.env.math": 0.9285714285714286, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.5761589403973509, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5472551469581075, "success_rate.epoch.global": 0.551487414187643, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.2, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9945282101167315, "tokens_p.mean_in_band": 0.5966796875, "tokens_rate.above_band": 0.9413919413919414, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05860805860805861 }, { "epoch": 0.0382313829787234, "grad_norm": 275.0008213682556, "learning_rate": 1.900414937759336e-07, "loss": 1.1255, "step": 230, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.5454545454545454, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.2653061224489796, "success_rate.epoch.env.math": 0.9302325581395349, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.5732899022801303, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5542768081243968, "success_rate.epoch.global": 0.5515695067264574, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 1.0001378307939053, "tokens_p.mean_in_band": 0.6113489029255319, "tokens_rate.above_band": 0.9815033451397088, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018496654860291225 }, { "epoch": 0.0390625, "grad_norm": 568.8843764892392, "learning_rate": 1.941908713692946e-07, "loss": 1.242, "step": 235, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.5454545454545454, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.2549019607843137, "success_rate.epoch.env.math": 0.9130434782608695, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.572347266881029, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5511061551307931, "success_rate.epoch.global": 0.5494505494505495, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.38888888888888884, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9971608231707317, "tokens_p.mean_in_band": 0.5733072916666667, "tokens_rate.above_band": 0.956268221574344, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043731778425655975 }, { "epoch": 0.0398936170212766, "grad_norm": 344.86458523370806, "learning_rate": 1.983402489626556e-07, "loss": 1.4863, "step": 240, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.46153846153846156, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.2549019607843137, "success_rate.epoch.env.math": 0.9148936170212766, "success_rate.epoch.env.sat": 0.05555555555555555, "success_rate.epoch.env.science": 0.5759493670886076, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5420248419303145, "success_rate.epoch.global": 0.5495689655172413, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.41666666666666663, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9974385245901639, "tokens_p.mean_in_band": 0.5966145833333333, "tokens_rate.above_band": 0.9312977099236641, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06870229007633588 }, { "epoch": 0.04072473404255319, "grad_norm": 185.59145960225354, "learning_rate": 1.9999999361088134e-07, "loss": 1.4664, "step": 245, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.24528301886792453, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.sat": 0.05263157894736842, "success_rate.epoch.env.science": 0.5736677115987461, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5350598170411204, "success_rate.epoch.global": 0.5443037974683544, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.2, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9974609375, "tokens_p.mean_in_band": 0.5228515625, "tokens_rate.above_band": 0.9411764705882353, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.058823529411764705 }, { "epoch": 0.041555851063829786, "grad_norm": 455.47015837406633, "learning_rate": 1.999999545662822e-07, "loss": 1.351, "step": 250, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.23636363636363636, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.sat": 0.047619047619047616, "success_rate.epoch.env.science": 0.5679012345679012, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5296965037705779, "success_rate.epoch.global": 0.5351239669421488, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.2, "success_rate.window.env_macro_mean": 0.05, "success_rate.window.global": 0.1, "tokens_p.mean_above_band": 0.9975178207739308, "tokens_p.mean_below_band": 6.693881005048752e-10, "tokens_p.mean_in_band": 0.5587733477011494, "tokens_rate.above_band": 0.9436258808456118, "tokens_rate.below_band": 0.0006406149903907751, "tokens_rate.in_band": 0.05573350416399744 }, { "epoch": 0.042386968085106384, "grad_norm": 461.8860860264364, "learning_rate": 1.9999988002666344e-07, "loss": 1.2482, "step": 255, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.375, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.25, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.sat": 0.047619047619047616, "success_rate.epoch.env.science": 0.5709969788519638, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5287778490950696, "success_rate.epoch.global": 0.537525354969574, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.995, "tokens_p.mean_in_band": 0.6566540948275862, "tokens_rate.above_band": 0.9476534296028881, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.052346570397111915 }, { "epoch": 0.04321808510638298, "grad_norm": 277.7702413417161, "learning_rate": 1.999997699921574e-07, "loss": 1.2997, "step": 260, "success_rate.epoch.env.abd": 0.0, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.2631578947368421, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.sat": 0.047619047619047616, "success_rate.epoch.env.science": 0.5722713864306784, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.534466405561504, "success_rate.epoch.global": 0.5407554671968191, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9955787781350482, "tokens_p.mean_below_band": 2.7284841053187847e-12, "tokens_p.mean_in_band": 0.5538793103448276, "tokens_rate.above_band": 0.9120234604105572, "tokens_rate.below_band": 0.002932551319648094, "tokens_rate.in_band": 0.08504398826979472 }, { "epoch": 0.04404920212765957, "grad_norm": 455.8428416990228, "learning_rate": 1.9999962446295936e-07, "loss": 1.5878, "step": 265, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.27586206896551724, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.sat": 0.045454545454545456, "success_rate.epoch.env.science": 0.5780346820809249, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.558500068640884, "success_rate.epoch.global": 0.5458089668615984, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9926075268817204, "tokens_p.mean_in_band": 0.7098214285714286, "tokens_rate.above_band": 0.850609756097561, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14939024390243902 }, { "epoch": 0.04488031914893617, "grad_norm": 357.26486693076953, "learning_rate": 1.9999944343932754e-07, "loss": 1.5464, "step": 270, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.29508196721311475, "success_rate.epoch.env.math": 0.9038461538461539, "success_rate.epoch.env.sat": 0.045454545454545456, "success_rate.epoch.env.science": 0.58, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5057257764200869, "success_rate.epoch.global": 0.5487571701720841, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6041666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9938058035714286, "tokens_p.mean_in_band": 0.6649682971014492, "tokens_rate.above_band": 0.890302066772655, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10969793322734499 }, { "epoch": 0.04571143617021277, "grad_norm": 549.8655835803091, "learning_rate": 1.999992269215833e-07, "loss": 1.4171, "step": 275, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.3064516129032258, "success_rate.epoch.env.math": 0.9056603773584906, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.5786516853932584, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5112495922505863, "success_rate.epoch.global": 0.549718574108818, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9925223214285714, "tokens_p.mean_below_band": 7.566995918750763e-10, "tokens_p.mean_in_band": 0.6725694444444444, "tokens_rate.above_band": 0.8588957055214724, "tokens_rate.below_band": 0.003067484662576687, "tokens_rate.in_band": 0.13803680981595093 }, { "epoch": 0.04654255319148936, "grad_norm": 395.91521245328533, "learning_rate": 1.9999897491011083e-07, "loss": 1.2034, "step": 280, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.31746031746031744, "success_rate.epoch.env.math": 0.9056603773584906, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.5741758241758241, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5070914588487655, "success_rate.epoch.global": 0.5469613259668509, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.375, "success_rate.window.env_macro_mean": 0.4583333333333333, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.992175702204772, "tokens_p.mean_below_band": 4.842877388000488e-07, "tokens_p.mean_in_band": 0.5022039592352092, "tokens_rate.above_band": 0.826510234648028, "tokens_rate.below_band": 0.0004992511233150275, "tokens_rate.in_band": 0.17299051422865702 }, { "epoch": 0.04737367021276596, "grad_norm": 223.10287870494537, "learning_rate": 1.9999868740535746e-07, "loss": 1.2274, "step": 285, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.3230769230769231, "success_rate.epoch.env.math": 0.9074074074074074, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.5760869565217391, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5009770565809214, "success_rate.epoch.global": 0.546112115732369, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.375, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9928622412562456, "tokens_p.mean_in_band": 0.6591036676646707, "tokens_rate.above_band": 0.8934948979591837, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10650510204081633 }, { "epoch": 0.048204787234042555, "grad_norm": 191.06301969150624, "learning_rate": 1.9999836440783338e-07, "loss": 1.3325, "step": 290, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.3283582089552239, "success_rate.epoch.env.math": 0.9074074074074074, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.574468085106383, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5013839915212485, "success_rate.epoch.global": 0.5452930728241563, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9963715529753265, "tokens_p.mean_in_band": 0.5697443181818181, "tokens_rate.above_band": 0.9260752688172043, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0739247311827957 }, { "epoch": 0.049035904255319146, "grad_norm": 375.5001620168459, "learning_rate": 1.9999800591811186e-07, "loss": 1.1717, "step": 295, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.3382352941176471, "success_rate.epoch.env.math": 0.9090909090909091, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.577023498694517, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5026105556720414, "success_rate.epoch.global": 0.5479930191972077, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.6785714285714286, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9929872047244095, "tokens_p.mean_in_band": 0.6797022964015151, "tokens_rate.above_band": 0.8850174216027874, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11498257839721254 }, { "epoch": 0.049867021276595744, "grad_norm": 393.881222663392, "learning_rate": 1.999976119368291e-07, "loss": 1.2225, "step": 300, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.3333333333333333, "success_rate.epoch.env.math": 0.8793103448275862, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.5732647814910026, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4983393065330246, "success_rate.epoch.global": 0.5437392795883362, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.3333333333333333, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.2222222222222222, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.990234375, "tokens_p.mean_in_band": 0.5426897321428571, "tokens_rate.above_band": 0.8372093023255814, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16279069767441862 }, { "epoch": 0.05069813829787234, "grad_norm": 207.28229588926519, "learning_rate": 1.9999718246468435e-07, "loss": 0.9999, "step": 305, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.32857142857142857, "success_rate.epoch.env.math": 0.8793103448275862, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.5778894472361809, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4983240577533883, "success_rate.epoch.global": 0.5463743676222597, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.3888888888888889, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9986795774647887, "tokens_p.mean_in_band": 0.5867745535714286, "tokens_rate.above_band": 0.9287790697674418, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07122093023255814 }, { "epoch": 0.05152925531914894, "grad_norm": 236.4680773613799, "learning_rate": 1.9999671750243982e-07, "loss": 1.0784, "step": 310, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.32857142857142857, "success_rate.epoch.env.math": 0.8833333333333333, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.583743842364532, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4994215448238436, "success_rate.epoch.global": 0.5522388059701493, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9877232142857143, "tokens_p.mean_in_band": 0.7590169270833333, "tokens_rate.above_band": 0.8818897637795275, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11811023622047244 }, { "epoch": 0.05236037234042553, "grad_norm": 320.03233321273206, "learning_rate": 1.9999621705092072e-07, "loss": 1.4287, "step": 315, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.32432432432432434, "success_rate.epoch.env.math": 0.8852459016393442, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.5878048780487805, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4996133781286387, "success_rate.epoch.global": 0.553921568627451, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.25, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5625, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9957317073170732, "tokens_p.mean_in_band": 0.6130514705882353, "tokens_rate.above_band": 0.9731012658227848, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02689873417721519 }, { "epoch": 0.05319148936170213, "grad_norm": 367.52734006325716, "learning_rate": 1.999956811110152e-07, "loss": 1.6472, "step": 320, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.3116883116883117, "success_rate.epoch.env.math": 0.8852459016393442, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.5875299760191847, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4981788320546823, "success_rate.epoch.global": 0.5514469453376206, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.2857142857142857, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9966280798640612, "tokens_p.mean_in_band": 0.529296875, "tokens_rate.above_band": 0.9453815261044177, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05461847389558233 }, { "epoch": 0.054022606382978726, "grad_norm": 421.4314992788045, "learning_rate": 1.9999510968367442e-07, "loss": 1.0747, "step": 325, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.3037974683544304, "success_rate.epoch.env.math": 0.873015873015873, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.5843230403800475, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.446028168089565, "success_rate.epoch.global": 0.5467511885895404, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.1875, "success_rate.window.global": 0.2, "tokens_p.mean_above_band": 1.000200491873396, "tokens_p.mean_below_band": 4.922640073345974e-10, "tokens_p.mean_in_band": 0.5323585304054054, "tokens_rate.above_band": 0.9685169842584921, "tokens_rate.below_band": 0.0008285004142502071, "tokens_rate.in_band": 0.030654515327257662 }, { "epoch": 0.05485372340425532, "grad_norm": 296.71467421076136, "learning_rate": 1.999945027699125e-07, "loss": 1.0972, "step": 330, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.30120481927710846, "success_rate.epoch.env.math": 0.8769230769230769, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.5811764705882353, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.445844966593372, "success_rate.epoch.global": 0.5444617784711389, "success_rate.window.env.logic": 0.25, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9984885126964933, "tokens_p.mean_in_band": 0.6131036931818182, "tokens_rate.above_band": 0.9494833524684271, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0505166475315729 }, { "epoch": 0.055684840425531915, "grad_norm": 215.43312239144745, "learning_rate": 1.9999386037080662e-07, "loss": 1.2842, "step": 335, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.30120481927710846, "success_rate.epoch.env.math": 0.8676470588235294, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.5787037037037037, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.444670088094964, "success_rate.epoch.global": 0.543778801843318, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.5476190476190476, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.986328125, "tokens_p.mean_in_band": 0.6127232142857143, "tokens_rate.above_band": 0.7619047619047619, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.23809523809523808 }, { "epoch": 0.05651595744680851, "grad_norm": 199.4685504028312, "learning_rate": 1.9999318248749688e-07, "loss": 1.1739, "step": 340, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.30120481927710846, "success_rate.epoch.env.math": 0.8714285714285714, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.5740318906605922, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4479998614699605, "success_rate.epoch.global": 0.5431164901664145, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.2857142857142857, "success_rate.window.env_macro_mean": 0.7619047619047619, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9866573033707865, "tokens_p.mean_below_band": 1.9190338207408786e-10, "tokens_p.mean_in_band": 0.5721153846153846, "tokens_rate.above_band": 0.8682926829268293, "tokens_rate.below_band": 0.004878048780487805, "tokens_rate.in_band": 0.12682926829268293 }, { "epoch": 0.0573470744680851, "grad_norm": 356.2683257848366, "learning_rate": 1.9999246912118632e-07, "loss": 1.2012, "step": 345, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5625, "success_rate.epoch.env.logic": 0.30120481927710846, "success_rate.epoch.env.math": 0.8611111111111112, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.5717488789237668, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.44965648093119864, "success_rate.epoch.global": 0.5424739195230999, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.6428571428571429, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.98296875, "tokens_p.mean_below_band": 2.6756374893466273e-14, "tokens_p.mean_in_band": 0.5924161585365854, "tokens_rate.above_band": 0.9049773755656109, "tokens_rate.below_band": 0.0022624434389140274, "tokens_rate.in_band": 0.09276018099547512 }, { "epoch": 0.0581781914893617, "grad_norm": 175.17612522561566, "learning_rate": 1.99991720273141e-07, "loss": 1.2566, "step": 350, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5882352941176471, "success_rate.epoch.env.logic": 0.29069767441860467, "success_rate.epoch.env.math": 0.8611111111111112, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.5698447893569845, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.45098888690043476, "success_rate.epoch.global": 0.5397058823529411, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.35, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9938968428781204, "tokens_p.mean_in_band": 0.5026939655172413, "tokens_rate.above_band": 0.9399585921325052, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.060041407867494824 }, { "epoch": 0.0590093085106383, "grad_norm": 461.050063676608, "learning_rate": 1.9999093594468995e-07, "loss": 1.2682, "step": 355, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3684210526315789, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5882352941176471, "success_rate.epoch.env.logic": 0.2840909090909091, "success_rate.epoch.env.math": 0.8611111111111112, "success_rate.epoch.env.sat": 0.10714285714285714, "success_rate.epoch.env.science": 0.5689277899343544, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4477929014028458, "success_rate.epoch.global": 0.5362318840579711, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.125, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9958196271929824, "tokens_p.mean_in_band": 0.538671875, "tokens_rate.above_band": 0.9334698055271239, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06653019447287616 }, { "epoch": 0.0598404255319149, "grad_norm": 297.2931214793317, "learning_rate": 1.9999011613722514e-07, "loss": 1.0666, "step": 360, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3684210526315789, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5882352941176471, "success_rate.epoch.env.logic": 0.2808988764044944, "success_rate.epoch.env.math": 0.8611111111111112, "success_rate.epoch.env.sat": 0.10714285714285714, "success_rate.epoch.env.science": 0.572961373390558, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4478770564798246, "success_rate.epoch.global": 0.5385714285714286, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.3888888888888889, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9959612277867528, "tokens_p.mean_in_band": 0.5782596982758621, "tokens_rate.above_band": 0.9552469135802469, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044753086419753084 }, { "epoch": 0.06067154255319149, "grad_norm": 294.8580182370888, "learning_rate": 1.9998926085220154e-07, "loss": 1.2332, "step": 365, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.3684210526315789, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5882352941176471, "success_rate.epoch.env.logic": 0.2808988764044944, "success_rate.epoch.env.math": 0.8648648648648649, "success_rate.epoch.env.sat": 0.10714285714285714, "success_rate.epoch.env.science": 0.5759493670886076, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.44855123122500495, "success_rate.epoch.global": 0.5422535211267606, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9869237588652482, "tokens_p.mean_below_band": 4.3655745685100555e-09, "tokens_p.mean_in_band": 0.735, "tokens_rate.above_band": 0.844311377245509, "tokens_rate.below_band": 0.005988023952095809, "tokens_rate.in_band": 0.1497005988023952 }, { "epoch": 0.061502659574468085, "grad_norm": 172.51017172216643, "learning_rate": 1.999883700911371e-07, "loss": 1.3382, "step": 370, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.38095238095238093, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5555555555555556, "success_rate.epoch.env.logic": 0.28888888888888886, "success_rate.epoch.env.math": 0.8666666666666667, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.5744234800838575, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4469935248009419, "success_rate.epoch.global": 0.541029207232267, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.4583333333333333, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9888229927007299, "tokens_p.mean_in_band": 0.4854712701612903, "tokens_rate.above_band": 0.6734550561797753, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.3265449438202247 }, { "epoch": 0.06233377659574468, "grad_norm": 604.1278280415459, "learning_rate": 1.9998744385561267e-07, "loss": 1.5613, "step": 375, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.38095238095238093, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5555555555555556, "success_rate.epoch.env.logic": 0.2967032967032967, "success_rate.epoch.env.math": 0.868421052631579, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.5743801652892562, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4479460726994137, "success_rate.epoch.global": 0.5425824175824175, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9900873655913979, "tokens_p.mean_in_band": 0.6736727627840909, "tokens_rate.above_band": 0.8942307692307693, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10576923076923077 }, { "epoch": 0.06316489361702128, "grad_norm": 229.6209398930736, "learning_rate": 1.9998648214727209e-07, "loss": 1.0662, "step": 380, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.4090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5555555555555556, "success_rate.epoch.env.logic": 0.29347826086956524, "success_rate.epoch.env.math": 0.8734177215189873, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.5766871165644172, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.45116778394615037, "success_rate.epoch.global": 0.5460704607046071, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0001265182186234, "tokens_p.mean_in_band": 0.5583298141891891, "tokens_rate.above_band": 0.9639024390243902, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03609756097560975 }, { "epoch": 0.06399601063829788, "grad_norm": 265.1657256856042, "learning_rate": 1.9998548496782216e-07, "loss": 1.1013, "step": 385, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.4090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5555555555555556, "success_rate.epoch.env.logic": 0.2872340425531915, "success_rate.epoch.env.math": 0.875, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.5757575757575758, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4506086358819301, "success_rate.epoch.global": 0.5448460508701473, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.5238095238095238, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.998913421418637, "tokens_p.mean_in_band": 0.5935763888888889, "tokens_rate.above_band": 0.9410994764397905, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.058900523560209424 }, { "epoch": 0.06482712765957446, "grad_norm": 172.95453270951595, "learning_rate": 1.9998445231903263e-07, "loss": 1.4177, "step": 390, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.4090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5789473684210527, "success_rate.epoch.env.logic": 0.28421052631578947, "success_rate.epoch.env.math": 0.875, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.5756972111553785, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.452294601498313, "success_rate.epoch.global": 0.5442536327608983, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.39285714285714285, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9935334158415842, "tokens_p.mean_below_band": 3.342393029015511e-11, "tokens_p.mean_in_band": 0.5884811046511628, "tokens_rate.above_band": 0.9582542694497154, "tokens_rate.below_band": 0.0009487666034155598, "tokens_rate.in_band": 0.04079696394686907 }, { "epoch": 0.06565824468085106, "grad_norm": 357.3514810176116, "learning_rate": 1.999833842027362e-07, "loss": 1.0726, "step": 395, "success_rate.epoch.env.abd": 0.2, "success_rate.epoch.env.agentgym:alfworld": 0.4090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5789473684210527, "success_rate.epoch.env.logic": 0.27835051546391754, "success_rate.epoch.env.math": 0.875, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.5725490196078431, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4513937812583723, "success_rate.epoch.global": 0.5410691003911343, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.375, "success_rate.window.env_macro_mean": 0.1875, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9976134726224783, "tokens_p.mean_in_band": 0.5455923507462687, "tokens_rate.above_band": 0.911957950065703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08804204993429698 }, { "epoch": 0.06648936170212766, "grad_norm": 278.3049999132479, "learning_rate": 1.9998228062082854e-07, "loss": 1.2295, "step": 400, "success_rate.epoch.env.abd": 0.16666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.55, "success_rate.epoch.env.logic": 0.27835051546391754, "success_rate.epoch.env.math": 0.875, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.5689320388349515, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4473040130056445, "success_rate.epoch.global": 0.5373711340206185, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.sat": 0.3333333333333333, "success_rate.window.env.science": 0.2, "success_rate.window.env_macro_mean": 0.13333333333333333, "success_rate.window.global": 0.2, "tokens_p.mean_above_band": 0.993797195253506, "tokens_p.mean_in_band": 0.5444269825918762, "tokens_rate.above_band": 0.8776331360946745, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12236686390532545 }, { "epoch": 0.06732047872340426, "grad_norm": 327.2404418386426, "learning_rate": 1.999811415752682e-07, "loss": 1.2184, "step": 405, "success_rate.epoch.env.abd": 0.2857142857142857, "success_rate.epoch.env.agentgym:alfworld": 0.4090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.55, "success_rate.epoch.env.logic": 0.29292929292929293, "success_rate.epoch.env.math": 0.875, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.5747126436781609, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4612447131412648, "success_rate.epoch.global": 0.5432569974554707, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9920774647887324, "tokens_p.mean_in_band": 0.7473958333333334, "tokens_rate.above_band": 0.9403973509933775, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.059602649006622516 }, { "epoch": 0.06815159574468085, "grad_norm": 649.8902582794741, "learning_rate": 1.999799670680767e-07, "loss": 1.2794, "step": 410, "success_rate.epoch.env.abd": 0.25, "success_rate.epoch.env.agentgym:alfworld": 0.4090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.3, "success_rate.epoch.env.math": 0.8765432098765432, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.5768500948766604, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.46051249064848054, "success_rate.epoch.global": 0.5452261306532663, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9873650637880275, "tokens_p.mean_in_band": 0.24865540679039952, "tokens_rate.above_band": 0.2330208095129202, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.7669791904870799 }, { "epoch": 0.06898271276595745, "grad_norm": 441.26911688802744, "learning_rate": 1.9997875710133858e-07, "loss": 1.1711, "step": 415, "success_rate.epoch.env.abd": 0.25, "success_rate.epoch.env.agentgym:alfworld": 0.43478260869565216, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.3, "success_rate.epoch.env.math": 0.8690476190476191, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.5778611632270169, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.46243320836109814, "success_rate.epoch.global": 0.5471464019851117, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9973591549295775, "tokens_p.mean_below_band": 1.4342367882225204e-07, "tokens_p.mean_in_band": 0.6630345394736842, "tokens_rate.above_band": 0.9311475409836065, "tokens_rate.below_band": 0.006557377049180328, "tokens_rate.in_band": 0.06229508196721312 }, { "epoch": 0.06981382978723404, "grad_norm": 496.73959779342533, "learning_rate": 1.9997751167720117e-07, "loss": 1.3355, "step": 420, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.43478260869565216, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.3, "success_rate.epoch.env.math": 0.8705882352941177, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.5767097966728281, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.45969435555255134, "success_rate.epoch.global": 0.5465686274509803, "success_rate.window.env.abd": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9919117647058824, "tokens_p.mean_below_band": 3.213062882423401e-08, "tokens_p.mean_in_band": 0.6492745535714286, "tokens_rate.above_band": 0.918918918918919, "tokens_rate.below_band": 0.005405405405405406, "tokens_rate.in_band": 0.07567567567567568 }, { "epoch": 0.07064494680851063, "grad_norm": 628.2080255418654, "learning_rate": 1.9997623079787476e-07, "loss": 1.1262, "step": 425, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.3, "success_rate.epoch.env.math": 0.8620689655172413, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.5766423357664233, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4570240882813247, "success_rate.epoch.global": 0.5460048426150121, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.35714285714285715, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9963908450704225, "tokens_p.mean_in_band": 0.6424479166666667, "tokens_rate.above_band": 0.922077922077922, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07792207792207792 }, { "epoch": 0.07147606382978723, "grad_norm": 231.12520962533006, "learning_rate": 1.9997491446563267e-07, "loss": 1.1231, "step": 430, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.44, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.3, "success_rate.epoch.env.math": 0.8636363636363636, "success_rate.epoch.env.sat": 0.14705882352941177, "success_rate.epoch.env.science": 0.5801801801801801, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.462452616099675, "success_rate.epoch.global": 0.5502392344497608, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964330024813896, "tokens_p.mean_in_band": 0.63916015625, "tokens_rate.above_band": 0.9710843373493976, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02891566265060241 }, { "epoch": 0.07230718085106383, "grad_norm": 297.4473271435285, "learning_rate": 1.99973562682811e-07, "loss": 0.9839, "step": 435, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.4230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.3137254901960784, "success_rate.epoch.env.math": 0.8539325842696629, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.5796064400715564, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.4606849374122158, "success_rate.epoch.global": 0.5491124260355029, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.3, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9964139344262295, "tokens_p.mean_below_band": 4.192202140984591e-13, "tokens_p.mean_in_band": 0.5060221354166666, "tokens_rate.above_band": 0.9512670565302144, "tokens_rate.below_band": 0.001949317738791423, "tokens_rate.in_band": 0.04678362573099415 }, { "epoch": 0.07313829787234043, "grad_norm": 175.33723100290754, "learning_rate": 1.9997217545180892e-07, "loss": 1.2852, "step": 440, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.4230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.32038834951456313, "success_rate.epoch.env.math": 0.8571428571428571, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.5780141843971631, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5104663864217674, "success_rate.epoch.global": 0.550351288056206, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9927273882113821, "tokens_p.mean_in_band": 0.671875, "tokens_rate.above_band": 0.9389312977099237, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.061068702290076333 }, { "epoch": 0.07396941489361702, "grad_norm": 406.97798204714803, "learning_rate": 1.9997075277508834e-07, "loss": 1.3382, "step": 445, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.4230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5909090909090909, "success_rate.epoch.env.logic": 0.32038834951456313, "success_rate.epoch.env.math": 0.8571428571428571, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.5741710296684118, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5118879650355646, "success_rate.epoch.global": 0.5486111111111112, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9934440559440559, "tokens_p.mean_below_band": 3.774403012357652e-11, "tokens_p.mean_in_band": 0.5091959635416666, "tokens_rate.above_band": 0.9196141479099679, "tokens_rate.below_band": 0.003215434083601286, "tokens_rate.in_band": 0.07717041800643087 }, { "epoch": 0.07480053191489362, "grad_norm": 334.93766256356366, "learning_rate": 1.999692946551742e-07, "loss": 1.4068, "step": 450, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.4230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5909090909090909, "success_rate.epoch.env.logic": 0.3238095238095238, "success_rate.epoch.env.math": 0.8586956521739131, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.5729166666666666, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5122261110650439, "success_rate.epoch.global": 0.5482758620689655, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9946907600596125, "tokens_p.mean_below_band": 4.6629367034256575e-15, "tokens_p.mean_in_band": 0.6300698138297872, "tokens_rate.above_band": 0.933240611961057, "tokens_rate.below_band": 0.0013908205841446453, "tokens_rate.in_band": 0.06536856745479833 }, { "epoch": 0.0756316489361702, "grad_norm": 683.6804685688163, "learning_rate": 1.9996780109465432e-07, "loss": 1.186, "step": 455, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.4230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5909090909090909, "success_rate.epoch.env.logic": 0.3238095238095238, "success_rate.epoch.env.math": 0.8602150537634409, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.5711835334476844, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5118459305561612, "success_rate.epoch.global": 0.5472127417519909, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.375, "success_rate.window.env_macro_mean": 0.4583333333333333, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9900914634146342, "tokens_p.mean_in_band": 0.5889811197916667, "tokens_rate.above_band": 0.7321428571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.26785714285714285 }, { "epoch": 0.0764627659574468, "grad_norm": 155.9163180652835, "learning_rate": 1.9996627209617939e-07, "loss": 1.1827, "step": 460, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.4230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5909090909090909, "success_rate.epoch.env.logic": 0.32710280373831774, "success_rate.epoch.env.math": 0.8617021276595744, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.5738539898132428, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5121820265958642, "success_rate.epoch.global": 0.5489313835770528, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9977647867950481, "tokens_p.mean_in_band": 0.6150323275862069, "tokens_rate.above_band": 0.9261146496815287, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07388535031847134 }, { "epoch": 0.0772938829787234, "grad_norm": 263.62560819462385, "learning_rate": 1.9996470766246299e-07, "loss": 1.3302, "step": 465, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.6086956521739131, "success_rate.epoch.env.logic": 0.32727272727272727, "success_rate.epoch.env.math": 0.8617021276595744, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.5733558178752108, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5142686464463828, "success_rate.epoch.global": 0.5483870967741935, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9964634773662552, "tokens_p.mean_in_band": 0.6992017663043478, "tokens_rate.above_band": 0.9441476444876153, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.055852355512384655 }, { "epoch": 0.078125, "grad_norm": 425.7380637797864, "learning_rate": 1.999631077962816e-07, "loss": 1.1097, "step": 470, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.41379310344827586, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.6086956521739131, "success_rate.epoch.env.logic": 0.32727272727272727, "success_rate.epoch.env.math": 0.8556701030927835, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.5742904841402338, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5124617661350265, "success_rate.epoch.global": 0.5489548954895489, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9961666666666666, "tokens_p.mean_in_band": 0.60198974609375, "tokens_rate.above_band": 0.959079283887468, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04092071611253197 }, { "epoch": 0.0789561170212766, "grad_norm": 257.61330518805875, "learning_rate": 1.9996147250047465e-07, "loss": 1.2895, "step": 475, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.41379310344827586, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.6, "success_rate.epoch.env.logic": 0.33035714285714285, "success_rate.epoch.env.math": 0.8571428571428571, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.5745033112582781, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5121048883694465, "success_rate.epoch.global": 0.5495103373231773, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.65, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9957741171403962, "tokens_p.mean_below_band": 8.866190910339355e-07, "tokens_p.mean_in_band": 0.49592791863207547, "tokens_rate.above_band": 0.9561457689932057, "tokens_rate.below_band": 0.00020588840848260242, "tokens_rate.in_band": 0.04364834259831171 }, { "epoch": 0.0797872340425532, "grad_norm": 646.3933282739685, "learning_rate": 1.9995980177794433e-07, "loss": 1.5417, "step": 480, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.6, "success_rate.epoch.env.logic": 0.33035714285714285, "success_rate.epoch.env.math": 0.8484848484848485, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.5721311475409836, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5249997420824545, "success_rate.epoch.global": 0.5474137931034483, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.35714285714285715, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9934895833333334, "tokens_p.mean_in_band": 0.5028545673076923, "tokens_rate.above_band": 0.9509433962264151, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04905660377358491 }, { "epoch": 0.0806183510638298, "grad_norm": 486.91008628123825, "learning_rate": 1.9995809563165579e-07, "loss": 1.1777, "step": 485, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.6, "success_rate.epoch.env.logic": 0.33035714285714285, "success_rate.epoch.env.math": 0.8431372549019608, "success_rate.epoch.env.sat": 0.13157894736842105, "success_rate.epoch.env.science": 0.5762987012987013, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5245691759377377, "success_rate.epoch.global": 0.5501066098081023, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9915149006622517, "tokens_p.mean_in_band": 0.5921630859375, "tokens_rate.above_band": 0.825136612021858, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17486338797814208 }, { "epoch": 0.08144946808510638, "grad_norm": 324.6965320642356, "learning_rate": 1.99956354064637e-07, "loss": 1.0392, "step": 490, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.33035714285714285, "success_rate.epoch.env.math": 0.8490566037735849, "success_rate.epoch.env.sat": 0.13157894736842105, "success_rate.epoch.env.science": 0.5751211631663974, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5263988510399137, "success_rate.epoch.global": 0.5517970401691332, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.2, "success_rate.window.env_macro_mean": 0.7333333333333334, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9870379876796714, "tokens_p.mean_in_band": 0.7021033653846154, "tokens_rate.above_band": 0.8822463768115942, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11775362318840579 }, { "epoch": 0.08228058510638298, "grad_norm": 479.0166035790374, "learning_rate": 1.9995457707997888e-07, "loss": 1.4344, "step": 495, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.3870967741935484, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5925925925925926, "success_rate.epoch.env.logic": 0.3274336283185841, "success_rate.epoch.env.math": 0.8504672897196262, "success_rate.epoch.env.sat": 0.1282051282051282, "success_rate.epoch.env.science": 0.5737179487179487, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5225820227851198, "success_rate.epoch.global": 0.549163179916318, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.2333333333333333, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9933845766129032, "tokens_p.mean_below_band": 7.82310962677002e-07, "tokens_p.mean_in_band": 0.35601395730706076, "tokens_rate.above_band": 0.7092469018112488, "tokens_rate.below_band": 0.00047664442326024784, "tokens_rate.in_band": 0.29027645376549094 }, { "epoch": 0.08311170212765957, "grad_norm": 382.2248088044927, "learning_rate": 1.9995276468083503e-07, "loss": 1.0168, "step": 500, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.3870967741935484, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.32456140350877194, "success_rate.epoch.env.math": 0.8518518518518519, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.5777777777777777, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5206004788772192, "success_rate.epoch.global": 0.5507246376811594, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9930854386451972, "tokens_p.mean_below_band": 7.82310962677002e-07, "tokens_p.mean_in_band": 0.5431722005208334, "tokens_rate.above_band": 0.8821944648542738, "tokens_rate.below_band": 0.0002449179524859172, "tokens_rate.in_band": 0.11756061719324026 }, { "epoch": 0.08394281914893617, "grad_norm": 354.1107390334492, "learning_rate": 1.9995091687042212e-07, "loss": 1.2537, "step": 505, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.3870967741935484, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.3162393162393162, "success_rate.epoch.env.math": 0.8518518518518519, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.5770440251572327, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5197772207054009, "success_rate.epoch.global": 0.5487179487179488, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.2857142857142857, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9984090909090909, "tokens_p.mean_below_band": 7.048583938740194e-11, "tokens_p.mean_in_band": 0.6290646114864865, "tokens_rate.above_band": 0.9166666666666666, "tokens_rate.below_band": 0.0011111111111111111, "tokens_rate.in_band": 0.08222222222222222 }, { "epoch": 0.08477393617021277, "grad_norm": 230.07439759570912, "learning_rate": 1.999490336520195e-07, "loss": 0.8621, "step": 510, "success_rate.epoch.env.abd": 0.2222222222222222, "success_rate.epoch.env.agentgym:alfworld": 0.3870967741935484, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.3135593220338983, "success_rate.epoch.env.math": 0.8518518518518519, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.577639751552795, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5195877418135958, "success_rate.epoch.global": 0.5487804878048781, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.20833333333333334, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9926061678463094, "tokens_p.mean_below_band": 7.338821887969971e-07, "tokens_p.mean_in_band": 0.5253504372427984, "tokens_rate.above_band": 0.8903893765473779, "tokens_rate.below_band": 0.00022507314877335134, "tokens_rate.in_band": 0.10938555030384875 }, { "epoch": 0.08560505319148937, "grad_norm": 427.0258546865168, "learning_rate": 1.9994711502896943e-07, "loss": 1.1471, "step": 515, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.40625, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.31666666666666665, "success_rate.epoch.env.math": 0.8518518518518519, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.5784615384615385, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5287568450068449, "success_rate.epoch.global": 0.5503018108651911, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9949675324675324, "tokens_p.mean_in_band": 0.6814135174418605, "tokens_rate.above_band": 0.8995327102803738, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10046728971962617 }, { "epoch": 0.08643617021276596, "grad_norm": 803.1597369271358, "learning_rate": 1.9994516100467703e-07, "loss": 1.1766, "step": 520, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.42424242424242425, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.31666666666666665, "success_rate.epoch.env.math": 0.8532110091743119, "success_rate.epoch.env.sat": 0.12195121951219512, "success_rate.epoch.env.science": 0.5792682926829268, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5303122591248876, "success_rate.epoch.global": 0.551345962113659, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6428571428571428, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9968072755417957, "tokens_p.mean_in_band": 0.526171875, "tokens_rate.above_band": 0.8433420365535248, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1566579634464752 }, { "epoch": 0.08726728723404255, "grad_norm": 174.46731622162167, "learning_rate": 1.999431715826102e-07, "loss": 1.1731, "step": 525, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.42424242424242425, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.319672131147541, "success_rate.epoch.env.math": 0.8532110091743119, "success_rate.epoch.env.sat": 0.11904761904761904, "success_rate.epoch.env.science": 0.579185520361991, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5303139947335569, "success_rate.epoch.global": 0.5508390918065152, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.35714285714285715, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9907882462686567, "tokens_p.mean_in_band": 0.5906982421875, "tokens_rate.above_band": 0.8933333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10666666666666667 }, { "epoch": 0.08809840425531915, "grad_norm": 283.8067170785908, "learning_rate": 1.9994114676629962e-07, "loss": 1.1053, "step": 530, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.3225806451612903, "success_rate.epoch.env.math": 0.8532110091743119, "success_rate.epoch.env.sat": 0.11904761904761904, "success_rate.epoch.env.science": 0.5799701046337817, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5277240807724597, "success_rate.epoch.global": 0.5503421309872922, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.29166666666666663, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.993408677184466, "tokens_p.mean_below_band": 2.568121999502182e-07, "tokens_p.mean_in_band": 0.5207973073122529, "tokens_rate.above_band": 0.8900410455821992, "tokens_rate.below_band": 0.0006480881399870382, "tokens_rate.in_band": 0.10931086627781378 }, { "epoch": 0.08892952127659574, "grad_norm": 400.74709719437845, "learning_rate": 1.9993908655933894e-07, "loss": 1.1141, "step": 535, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.3225806451612903, "success_rate.epoch.env.math": 0.8545454545454545, "success_rate.epoch.env.sat": 0.11904761904761904, "success_rate.epoch.env.science": 0.5828402366863905, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5281063150837099, "success_rate.epoch.global": 0.5528612997090203, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9890752032520326, "tokens_p.mean_below_band": 6.693881005048752e-10, "tokens_p.mean_in_band": 0.7046130952380952, "tokens_rate.above_band": 0.8482758620689655, "tokens_rate.below_band": 0.006896551724137931, "tokens_rate.in_band": 0.14482758620689656 }, { "epoch": 0.08976063829787234, "grad_norm": 365.3966680271899, "learning_rate": 1.9993699096538443e-07, "loss": 1.1627, "step": 540, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.3888888888888889, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.32, "success_rate.epoch.env.math": 0.8571428571428571, "success_rate.epoch.env.sat": 0.11627906976744186, "success_rate.epoch.env.science": 0.5823529411764706, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5257322328703053, "success_rate.epoch.global": 0.5513928914505284, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.3, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9979963442069741, "tokens_p.mean_in_band": 0.5530711206896551, "tokens_rate.above_band": 0.938753959873284, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.061246040126715945 }, { "epoch": 0.09059175531914894, "grad_norm": 329.1499487681222, "learning_rate": 1.9993485998815532e-07, "loss": 1.118, "step": 545, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.3783783783783784, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.31496062992125984, "success_rate.epoch.env.math": 0.8584070796460177, "success_rate.epoch.env.sat": 0.11627906976744186, "success_rate.epoch.env.science": 0.583941605839416, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5321537183166861, "success_rate.epoch.global": 0.5518553758325404, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.5599999999999999, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9974304583624912, "tokens_p.mean_in_band": 0.5894681490384616, "tokens_rate.above_band": 0.9648885887913572, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.035111411208642807 }, { "epoch": 0.09142287234042554, "grad_norm": 180.2068274951857, "learning_rate": 1.9993269363143354e-07, "loss": 1.07, "step": 550, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.3783783783783784, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.3125, "success_rate.epoch.env.math": 0.8584070796460177, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.5875542691751086, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5317886342037863, "success_rate.epoch.global": 0.5528301886792453, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9967641843971631, "tokens_p.mean_in_band": 0.6424231150793651, "tokens_rate.above_band": 0.91796875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08203125 }, { "epoch": 0.09225398936170212, "grad_norm": 98.47914100347661, "learning_rate": 1.9993049189906385e-07, "loss": 1.077, "step": 555, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.34146341463414637, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.3125, "success_rate.epoch.env.math": 0.8584070796460177, "success_rate.epoch.env.sat": 0.10869565217391304, "success_rate.epoch.env.science": 0.5884892086330935, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5282981357289277, "success_rate.epoch.global": 0.5509822263797942, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9981767794632439, "tokens_p.mean_in_band": 0.7005642361111111, "tokens_rate.above_band": 0.950110864745011, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04988913525498891 }, { "epoch": 0.09308510638297872, "grad_norm": 226.43247644121573, "learning_rate": 1.999282547949538e-07, "loss": 1.0796, "step": 560, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.34146341463414637, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.3153846153846154, "success_rate.epoch.env.math": 0.8521739130434782, "success_rate.epoch.env.sat": 0.10869565217391304, "success_rate.epoch.env.science": 0.5894134477825465, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.528077743722703, "success_rate.epoch.global": 0.5515320334261838, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9987796373779637, "tokens_p.mean_in_band": 0.5765772405660378, "tokens_rate.above_band": 0.9311688311688312, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06883116883116883 }, { "epoch": 0.09391622340425532, "grad_norm": 365.66409248976464, "learning_rate": 1.9992598232307372e-07, "loss": 1.2875, "step": 565, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.34146341463414637, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.3153846153846154, "success_rate.epoch.env.math": 0.8521739130434782, "success_rate.epoch.env.sat": 0.10869565217391304, "success_rate.epoch.env.science": 0.5892351274787535, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5280615327859945, "success_rate.epoch.global": 0.551660516605166, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9895104895104895, "tokens_p.mean_in_band": 0.6145241477272727, "tokens_rate.above_band": 0.7647058823529411, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.23529411764705882 }, { "epoch": 0.09474734042553191, "grad_norm": 435.6573483721362, "learning_rate": 1.9992367448745663e-07, "loss": 1.1268, "step": 570, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.34146341463414637, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.3153846153846154, "success_rate.epoch.env.math": 0.853448275862069, "success_rate.epoch.env.sat": 0.10869565217391304, "success_rate.epoch.env.science": 0.5902097902097903, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5282659896541425, "success_rate.epoch.global": 0.553016453382084, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9854484732824428, "tokens_p.mean_in_band": 0.5512152777777778, "tokens_rate.above_band": 0.7443181818181818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2556818181818182 }, { "epoch": 0.09557845744680851, "grad_norm": 553.93686230331, "learning_rate": 1.999213312921984e-07, "loss": 0.9857, "step": 575, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.34146341463414637, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.31297709923664124, "success_rate.epoch.env.math": 0.853448275862069, "success_rate.epoch.env.sat": 0.10869565217391304, "success_rate.epoch.env.science": 0.5922330097087378, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5282310535951402, "success_rate.epoch.global": 0.5540417801998183, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.996368501529052, "tokens_p.mean_in_band": 0.6546585648148148, "tokens_rate.above_band": 0.960352422907489, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039647577092511016 }, { "epoch": 0.09640957446808511, "grad_norm": 277.3004653277527, "learning_rate": 1.999189527414576e-07, "loss": 1.0519, "step": 580, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.34146341463414637, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.31343283582089554, "success_rate.epoch.env.math": 0.8547008547008547, "success_rate.epoch.env.sat": 0.10638297872340426, "success_rate.epoch.env.science": 0.5939226519337016, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5266578244678487, "success_rate.epoch.global": 0.5540540540540541, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.25, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9953516902944384, "tokens_p.mean_in_band": 0.5635202891791045, "tokens_rate.above_band": 0.8953329427846124, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10466705721538762 }, { "epoch": 0.09724069148936171, "grad_norm": 260.95538144569673, "learning_rate": 1.9991653883945568e-07, "loss": 0.9493, "step": 585, "success_rate.epoch.env.abd": 0.3, "success_rate.epoch.env.agentgym:alfworld": 0.34146341463414637, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.3161764705882353, "success_rate.epoch.env.math": 0.85, "success_rate.epoch.env.sat": 0.10416666666666667, "success_rate.epoch.env.science": 0.5961538461538461, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5264812483069298, "success_rate.epoch.global": 0.5553571428571429, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9984065155807366, "tokens_p.mean_below_band": 1.6555645743210334e-12, "tokens_p.mean_in_band": 0.5758539244186046, "tokens_rate.above_band": 0.9413333333333334, "tokens_rate.below_band": 0.0013333333333333333, "tokens_rate.in_band": 0.05733333333333333 }, { "epoch": 0.09807180851063829, "grad_norm": 371.6161150140347, "learning_rate": 1.999140895904766e-07, "loss": 1.0071, "step": 590, "success_rate.epoch.env.abd": 0.36363636363636365, "success_rate.epoch.env.agentgym:alfworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.3161764705882353, "success_rate.epoch.env.math": 0.8512396694214877, "success_rate.epoch.env.sat": 0.10416666666666667, "success_rate.epoch.env.science": 0.5953678474114441, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5315685167628058, "success_rate.epoch.global": 0.5553587245349867, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6428571428571428, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9971897482014388, "tokens_p.mean_below_band": 1.1874362826347351e-08, "tokens_p.mean_in_band": 0.5916466346153846, "tokens_rate.above_band": 0.952054794520548, "tokens_rate.below_band": 0.003424657534246575, "tokens_rate.in_band": 0.04452054794520548 }, { "epoch": 0.09890292553191489, "grad_norm": 310.7065388890588, "learning_rate": 1.9991160499886725e-07, "loss": 1.0951, "step": 595, "success_rate.epoch.env.abd": 0.36363636363636365, "success_rate.epoch.env.agentgym:alfworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.31386861313868614, "success_rate.epoch.env.math": 0.8467741935483871, "success_rate.epoch.env.sat": 0.10416666666666667, "success_rate.epoch.env.science": 0.5926928281461434, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5291455482479678, "success_rate.epoch.global": 0.553116769095698, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.2, "success_rate.window.env_macro_mean": 0.21666666666666667, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9921995130702204, "tokens_p.mean_below_band": 1.195439836010337e-07, "tokens_p.mean_in_band": 0.5019733297413793, "tokens_rate.above_band": 0.8702051739518287, "tokens_rate.below_band": 0.00044603033006244426, "tokens_rate.in_band": 0.12934879571810884 }, { "epoch": 0.09973404255319149, "grad_norm": 258.0866267581683, "learning_rate": 1.9990908506903711e-07, "loss": 1.1214, "step": 600, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.30714285714285716, "success_rate.epoch.env.math": 0.8333333333333334, "success_rate.epoch.env.sat": 0.10204081632653061, "success_rate.epoch.env.science": 0.5929919137466307, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.531967086618856, "success_rate.epoch.global": 0.5509138381201044, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9963216145833333, "tokens_p.mean_below_band": 8.334609447047114e-09, "tokens_p.mean_in_band": 0.5324629934210526, "tokens_rate.above_band": 0.9082308420056765, "tokens_rate.below_band": 0.001892147587511826, "tokens_rate.in_band": 0.08987701040681173 }, { "epoch": 0.10056515957446809, "grad_norm": 245.00206467563692, "learning_rate": 1.9990652980545854e-07, "loss": 1.1973, "step": 605, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.30714285714285716, "success_rate.epoch.env.math": 0.8372093023255814, "success_rate.epoch.env.sat": 0.10204081632653061, "success_rate.epoch.env.science": 0.5909090909090909, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5321300999056476, "success_rate.epoch.global": 0.5509499136442142, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.2857142857142857, "success_rate.window.env_macro_mean": 0.6428571428571428, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9851804123711341, "tokens_p.mean_in_band": 0.6308159722222222, "tokens_rate.above_band": 0.6830985915492958, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.31690140845070425 }, { "epoch": 0.10139627659574468, "grad_norm": 392.3265895886073, "learning_rate": 1.9990393921266642e-07, "loss": 1.1094, "step": 610, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.3125, "success_rate.epoch.env.math": 0.8307692307692308, "success_rate.epoch.env.sat": 0.10204081632653061, "success_rate.epoch.env.science": 0.589095744680851, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.577321347639516, "success_rate.epoch.global": 0.5496575342465754, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.4375, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9981095967139656, "tokens_p.mean_below_band": 3.268496584496461e-13, "tokens_p.mean_in_band": 0.5858808876811594, "tokens_rate.above_band": 0.950319375443577, "tokens_rate.below_band": 0.0007097232079489, "tokens_rate.in_band": 0.048970901348474094 }, { "epoch": 0.10222739361702128, "grad_norm": 196.3933602505688, "learning_rate": 1.999013132952584e-07, "loss": 1.3064, "step": 615, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.3082191780821918, "success_rate.epoch.env.math": 0.8307692307692308, "success_rate.epoch.env.sat": 0.10204081632653061, "success_rate.epoch.env.science": 0.5910290237467019, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5771079346529745, "success_rate.epoch.global": 0.5501700680272109, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.99125, "tokens_p.mean_in_band": 0.6725431743421053, "tokens_rate.above_band": 0.8555133079847909, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1444866920152091 }, { "epoch": 0.10305851063829788, "grad_norm": 253.0735143574712, "learning_rate": 1.9989865205789493e-07, "loss": 1.3374, "step": 620, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.30612244897959184, "success_rate.epoch.env.math": 0.8257575757575758, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.5910878112712975, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5762815334787755, "success_rate.epoch.global": 0.549367088607595, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9961053412462908, "tokens_p.mean_below_band": 2.455635694786906e-10, "tokens_p.mean_in_band": 0.5500600961538461, "tokens_rate.above_band": 0.9108108108108108, "tokens_rate.below_band": 0.0013513513513513514, "tokens_rate.in_band": 0.08783783783783784 }, { "epoch": 0.10388962765957446, "grad_norm": 328.8627665120471, "learning_rate": 1.99895955505299e-07, "loss": 1.0881, "step": 625, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.3087248322147651, "success_rate.epoch.env.math": 0.8270676691729323, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.5903771131339401, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5765726042527002, "success_rate.epoch.global": 0.5494137353433836, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.611111111111111, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9981276747503567, "tokens_p.mean_in_band": 0.5607540246212122, "tokens_rate.above_band": 0.9550408719346049, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04495912806539509 }, { "epoch": 0.10472074468085106, "grad_norm": 788.3542226077053, "learning_rate": 1.9989322364225632e-07, "loss": 1.0327, "step": 630, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.3087248322147651, "success_rate.epoch.env.math": 0.8235294117647058, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.5891472868217055, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.562502778459931, "success_rate.epoch.global": 0.5486284289276808, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.38888888888888884, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9920990566037736, "tokens_p.mean_in_band": 0.55828125, "tokens_rate.above_band": 0.9137931034482759, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08620689655172414 }, { "epoch": 0.10555186170212766, "grad_norm": 166.99230029427298, "learning_rate": 1.998904564736153e-07, "loss": 1.0952, "step": 635, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.3488372093023256, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.30666666666666664, "success_rate.epoch.env.math": 0.8260869565217391, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.5879332477535302, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5638472526517266, "success_rate.epoch.global": 0.5486798679867987, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9992690058479532, "tokens_p.mean_in_band": 0.4449677900834517, "tokens_rate.above_band": 0.9283387622149837, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07166123778501629 }, { "epoch": 0.10638297872340426, "grad_norm": 412.1973337202069, "learning_rate": 1.99887654004287e-07, "loss": 1.4361, "step": 640, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.3488372093023256, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.30666666666666664, "success_rate.epoch.env.math": 0.8297872340425532, "success_rate.epoch.env.sat": 0.09615384615384616, "success_rate.epoch.env.science": 0.5867346938775511, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.563725031724334, "success_rate.epoch.global": 0.5482815057283142, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.4666666666666666, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.99375, "tokens_p.mean_in_band": 0.64046875, "tokens_rate.above_band": 0.8113207547169812, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18867924528301888 }, { "epoch": 0.10721409574468085, "grad_norm": 667.891341906257, "learning_rate": 1.9988481623924508e-07, "loss": 1.3169, "step": 645, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.3488372093023256, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.30666666666666664, "success_rate.epoch.env.math": 0.8251748251748252, "success_rate.epoch.env.sat": 0.09615384615384616, "success_rate.epoch.env.science": 0.5848101265822785, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5631307611640612, "success_rate.epoch.global": 0.5471544715447154, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.30952380952380953, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9875, "tokens_p.mean_in_band": 0.6109470274390244, "tokens_rate.above_band": 0.7602339181286549, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.23976608187134502 }, { "epoch": 0.10804521276595745, "grad_norm": 206.94143675927717, "learning_rate": 1.9988194318352588e-07, "loss": 1.1671, "step": 650, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.36363636363636365, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.31125827814569534, "success_rate.epoch.env.math": 0.8251748251748252, "success_rate.epoch.env.sat": 0.09615384615384616, "success_rate.epoch.env.science": 0.5846925972396487, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5648828735704646, "success_rate.epoch.global": 0.5480225988700564, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6904761904761904, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9978407663316583, "tokens_p.mean_in_band": 0.5024801587301587, "tokens_rate.above_band": 0.9266589057043073, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07334109429569266 }, { "epoch": 0.10887632978723404, "grad_norm": 298.63004928229986, "learning_rate": 1.9987903484222842e-07, "loss": 1.0999, "step": 655, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.37777777777777777, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.31125827814569534, "success_rate.epoch.env.math": 0.8251748251748252, "success_rate.epoch.env.sat": 0.09615384615384616, "success_rate.epoch.env.science": 0.5856079404466501, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5662516696930479, "success_rate.epoch.global": 0.5492393915132106, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9943862275449101, "tokens_p.mean_in_band": 0.53623046875, "tokens_rate.above_band": 0.893048128342246, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10695187165775401 }, { "epoch": 0.10970744680851063, "grad_norm": 216.32593376010945, "learning_rate": 1.9987609122051422e-07, "loss": 1.4122, "step": 660, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.37777777777777777, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.3157894736842105, "success_rate.epoch.env.math": 0.8275862068965517, "success_rate.epoch.env.sat": 0.09615384615384616, "success_rate.epoch.env.science": 0.5862068965517241, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5669372636353492, "success_rate.epoch.global": 0.5508744038155803, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.8571428571428571, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.98875, "tokens_p.mean_in_band": 0.6669100806826637, "tokens_rate.above_band": 0.8928571428571429, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10714285714285714 }, { "epoch": 0.11053856382978723, "grad_norm": 282.3076004392478, "learning_rate": 1.9987311232360755e-07, "loss": 1.0588, "step": 665, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.37777777777777777, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.3137254901960784, "success_rate.epoch.env.math": 0.8299319727891157, "success_rate.epoch.env.sat": 0.09615384615384616, "success_rate.epoch.env.science": 0.5845588235294118, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5668130553973602, "success_rate.epoch.global": 0.5501976284584981, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.4761904761904762, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.99935413354531, "tokens_p.mean_below_band": 1.0477378964424133e-08, "tokens_p.mean_in_band": 0.54482421875, "tokens_rate.above_band": 0.9388059701492537, "tokens_rate.below_band": 0.0014925373134328358, "tokens_rate.in_band": 0.05970149253731343 }, { "epoch": 0.11136968085106383, "grad_norm": 125.76401939853064, "learning_rate": 1.9987009815679518e-07, "loss": 0.9728, "step": 670, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.37777777777777777, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.31210191082802546, "success_rate.epoch.env.math": 0.831081081081081, "success_rate.epoch.env.sat": 0.09615384615384616, "success_rate.epoch.env.science": 0.5848595848595849, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.566797263602277, "success_rate.epoch.global": 0.5498821681068342, "success_rate.window.env.logic": 0.2, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.65, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 1.0005815932818882, "tokens_p.mean_in_band": 0.5477594339622641, "tokens_rate.above_band": 0.9540926808142053, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045907319185794715 }, { "epoch": 0.11220079787234043, "grad_norm": 484.3191205688296, "learning_rate": 1.9986704872542656e-07, "loss": 1.1652, "step": 675, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.391304347826087, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.310126582278481, "success_rate.epoch.env.math": 0.831081081081081, "success_rate.epoch.env.sat": 0.09433962264150944, "success_rate.epoch.env.science": 0.5861650485436893, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5663348499125013, "success_rate.epoch.global": 0.5499219968798752, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.3666666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9940380921895007, "tokens_p.mean_in_band": 0.6952123397435898, "tokens_rate.above_band": 0.9524390243902439, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0475609756097561 }, { "epoch": 0.11303191489361702, "grad_norm": 406.6131034139839, "learning_rate": 1.9986396403491364e-07, "loss": 1.0301, "step": 680, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.391304347826087, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5151515151515151, "success_rate.epoch.env.logic": 0.3167701863354037, "success_rate.epoch.env.math": 0.8322147651006712, "success_rate.epoch.env.sat": 0.09433962264150944, "success_rate.epoch.env.science": 0.586248492159228, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5684268723528255, "success_rate.epoch.global": 0.5510835913312694, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8166666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.994873046875, "tokens_p.mean_in_band": 0.6511627906976745, "tokens_rate.above_band": 0.965845909451946, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03415409054805401 }, { "epoch": 0.11386303191489362, "grad_norm": 573.8555410692554, "learning_rate": 1.99860844090731e-07, "loss": 1.1237, "step": 685, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.391304347826087, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.3148148148148148, "success_rate.epoch.env.math": 0.8266666666666667, "success_rate.epoch.env.sat": 0.09433962264150944, "success_rate.epoch.env.science": 0.5885167464114832, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5665735331842934, "success_rate.epoch.global": 0.5514592933947773, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.21428571428571427, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9940596562184024, "tokens_p.mean_below_band": 1.2945383787155151e-07, "tokens_p.mean_in_band": 0.5166233519269777, "tokens_rate.above_band": 0.8887890361716468, "tokens_rate.below_band": 0.00044933722758930576, "tokens_rate.in_band": 0.11076162660076387 }, { "epoch": 0.1146941489361702, "grad_norm": 318.3039503873906, "learning_rate": 1.9985768889841581e-07, "loss": 1.1151, "step": 690, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.391304347826087, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4857142857142857, "success_rate.epoch.env.logic": 0.3148148148148148, "success_rate.epoch.env.math": 0.8157894736842105, "success_rate.epoch.env.sat": 0.09259259259259259, "success_rate.epoch.env.science": 0.5890736342042755, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5641778014093575, "success_rate.epoch.global": 0.5503048780487805, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.16666666666666666, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9870037807183365, "tokens_p.mean_in_band": 0.5964645127118644, "tokens_rate.above_band": 0.6915032679738562, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.3084967320261438 }, { "epoch": 0.1155252659574468, "grad_norm": 263.34531958739285, "learning_rate": 1.9985449846356773e-07, "loss": 1.1111, "step": 695, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.391304347826087, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4857142857142857, "success_rate.epoch.env.logic": 0.3148148148148148, "success_rate.epoch.env.math": 0.8181818181818182, "success_rate.epoch.env.sat": 0.09090909090909091, "success_rate.epoch.env.science": 0.589622641509434, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5642921514201997, "success_rate.epoch.global": 0.5510976532929599, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9908088235294118, "tokens_p.mean_in_band": 0.5995762711864406, "tokens_rate.above_band": 0.7216981132075472, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2783018867924528 }, { "epoch": 0.1163563829787234, "grad_norm": 165.3867424905044, "learning_rate": 1.99851272791849e-07, "loss": 1.0931, "step": 700, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.391304347826087, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4857142857142857, "success_rate.epoch.env.logic": 0.31901840490797545, "success_rate.epoch.env.math": 0.8181818181818182, "success_rate.epoch.env.sat": 0.08928571428571429, "success_rate.epoch.env.science": 0.5876168224299065, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5643443690920412, "success_rate.epoch.global": 0.549962434259955, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.375, "success_rate.window.env_macro_mean": 0.4583333333333333, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9897959183673469, "tokens_p.mean_in_band": 0.5998391544117647, "tokens_rate.above_band": 0.7424242424242424, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.25757575757575757 }, { "epoch": 0.1171875, "grad_norm": 184.36476903608798, "learning_rate": 1.9984801188898444e-07, "loss": 0.9731, "step": 705, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.391304347826087, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4857142857142857, "success_rate.epoch.env.logic": 0.31901840490797545, "success_rate.epoch.env.math": 0.8141025641025641, "success_rate.epoch.env.sat": 0.08928571428571429, "success_rate.epoch.env.science": 0.5879629629629629, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5640049951332959, "success_rate.epoch.global": 0.5503355704697986, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.5625, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.984375, "tokens_p.mean_below_band": 5.029141902923584e-07, "tokens_p.mean_in_band": 0.677734375, "tokens_rate.above_band": 0.7194244604316546, "tokens_rate.below_band": 0.007194244604316547, "tokens_rate.in_band": 0.2733812949640288 }, { "epoch": 0.1180186170212766, "grad_norm": 303.5670290657248, "learning_rate": 1.9984471576076126e-07, "loss": 1.1558, "step": 710, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.40425531914893614, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4722222222222222, "success_rate.epoch.env.logic": 0.3170731707317073, "success_rate.epoch.env.math": 0.8141025641025641, "success_rate.epoch.env.sat": 0.08771929824561403, "success_rate.epoch.env.science": 0.5873563218390805, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5635814148142537, "success_rate.epoch.global": 0.5492227979274611, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.3, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9923387096774193, "tokens_p.mean_in_band": 0.561494140625, "tokens_rate.above_band": 0.8559423769507803, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14405762304921968 }, { "epoch": 0.1188497340425532, "grad_norm": 130.78914744203175, "learning_rate": 1.9984138441302937e-07, "loss": 1.1692, "step": 715, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.40425531914893614, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4722222222222222, "success_rate.epoch.env.logic": 0.3170731707317073, "success_rate.epoch.env.math": 0.8164556962025317, "success_rate.epoch.env.sat": 0.08771929824561403, "success_rate.epoch.env.science": 0.5892448512585813, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5639670204069327, "success_rate.epoch.global": 0.5512159174649963, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9988790463692039, "tokens_p.mean_in_band": 0.5020616319444444, "tokens_rate.above_band": 0.9407407407407408, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05925925925925926 }, { "epoch": 0.1196808510638298, "grad_norm": 332.3820616116317, "learning_rate": 1.9983801785170108e-07, "loss": 1.0918, "step": 720, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.40425531914893614, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4864864864864865, "success_rate.epoch.env.logic": 0.32335329341317365, "success_rate.epoch.env.math": 0.8113207547169812, "success_rate.epoch.env.sat": 0.08771929824561403, "success_rate.epoch.env.science": 0.5899772209567198, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.565434458148598, "success_rate.epoch.global": 0.5519765739385066, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.5666666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9972903437705277, "tokens_p.mean_below_band": 2.1736923372372985e-10, "tokens_p.mean_in_band": 0.653169014084507, "tokens_rate.above_band": 0.9844794136667385, "tokens_rate.below_band": 0.0002155636990730761, "tokens_rate.in_band": 0.015305022634188403 }, { "epoch": 0.12051196808510638, "grad_norm": 187.4079827073579, "learning_rate": 1.9983461608275117e-07, "loss": 1.472, "step": 725, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.40425531914893614, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4864864864864865, "success_rate.epoch.env.logic": 0.32142857142857145, "success_rate.epoch.env.math": 0.8125, "success_rate.epoch.env.sat": 0.08620689655172414, "success_rate.epoch.env.science": 0.5898305084745763, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5652158589779056, "success_rate.epoch.global": 0.5515988372093024, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.39285714285714285, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.99375, "tokens_p.mean_in_band": 0.6453269675925926, "tokens_rate.above_band": 0.8280254777070064, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17197452229299362 }, { "epoch": 0.12134308510638298, "grad_norm": 173.24380997138368, "learning_rate": 1.9983117911221701e-07, "loss": 1.0742, "step": 730, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.40425531914893614, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4864864864864865, "success_rate.epoch.env.logic": 0.3235294117647059, "success_rate.epoch.env.math": 0.8136645962732919, "success_rate.epoch.env.sat": 0.08620689655172414, "success_rate.epoch.env.science": 0.5898876404494382, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5503663955158712, "success_rate.epoch.global": 0.551624548736462, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9964918759231906, "tokens_p.mean_in_band": 0.6041434151785714, "tokens_rate.above_band": 0.9602836879432625, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03971631205673759 }, { "epoch": 0.12217420212765957, "grad_norm": 1180.96945143047, "learning_rate": 1.9982770694619835e-07, "loss": 1.0098, "step": 735, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.40425531914893614, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4864864864864865, "success_rate.epoch.env.logic": 0.3216374269005848, "success_rate.epoch.env.math": 0.8136645962732919, "success_rate.epoch.env.sat": 0.08620689655172414, "success_rate.epoch.env.science": 0.5886287625418061, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5500799534457118, "success_rate.epoch.global": 0.5506101938262742, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.4444444444444444, "tokens_p.mean_above_band": 0.9984152139461173, "tokens_p.mean_below_band": 1.5688783605583012e-11, "tokens_p.mean_in_band": 0.5814416273584906, "tokens_rate.above_band": 0.9211678832116789, "tokens_rate.below_band": 0.00145985401459854, "tokens_rate.in_band": 0.07737226277372262 }, { "epoch": 0.12300531914893617, "grad_norm": 310.7366634118928, "learning_rate": 1.998241995908574e-07, "loss": 1.0865, "step": 740, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.40425531914893614, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4864864864864865, "success_rate.epoch.env.logic": 0.3236994219653179, "success_rate.epoch.env.math": 0.8148148148148148, "success_rate.epoch.env.sat": 0.0847457627118644, "success_rate.epoch.env.science": 0.5893451720310766, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5503042706507725, "success_rate.epoch.global": 0.5510349750178444, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9961260330578512, "tokens_p.mean_in_band": 0.60375, "tokens_rate.above_band": 0.9355670103092784, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06443298969072164 }, { "epoch": 0.12383643617021277, "grad_norm": 143.88637664778358, "learning_rate": 1.9982065705241887e-07, "loss": 0.9834, "step": 745, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.40425531914893614, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4864864864864865, "success_rate.epoch.env.logic": 0.3236994219653179, "success_rate.epoch.env.math": 0.8181818181818182, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.5900552486187846, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5505465116122433, "success_rate.epoch.global": 0.5521646557842441, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9922399328859061, "tokens_p.mean_in_band": 0.5324622844827587, "tokens_rate.above_band": 0.8370786516853933, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16292134831460675 }, { "epoch": 0.12466755319148937, "grad_norm": 250.25555079610672, "learning_rate": 1.9981707933716993e-07, "loss": 1.2934, "step": 750, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.40425531914893614, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4864864864864865, "success_rate.epoch.env.logic": 0.3218390804597701, "success_rate.epoch.env.math": 0.8192771084337349, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.5866228070175439, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.504710375898164, "success_rate.epoch.global": 0.5496828752642706, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.14285714285714285, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.2857142857142857, "success_rate.window.global": 0.2, "tokens_p.mean_above_band": 0.9961927020506635, "tokens_p.mean_in_band": 0.5739683493589743, "tokens_rate.above_band": 0.9140022050716649, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08599779492833518 }, { "epoch": 0.12549867021276595, "grad_norm": 441.32161997729486, "learning_rate": 1.9981346645146013e-07, "loss": 1.0241, "step": 755, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.40425531914893614, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4864864864864865, "success_rate.epoch.env.logic": 0.32571428571428573, "success_rate.epoch.env.math": 0.8214285714285714, "success_rate.epoch.env.sat": 0.08196721311475409, "success_rate.epoch.env.science": 0.5860566448801743, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5050825927975644, "success_rate.epoch.global": 0.5500349895031491, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9923155737704918, "tokens_p.mean_in_band": 0.6250831117021277, "tokens_rate.above_band": 0.8384879725085911, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16151202749140894 }, { "epoch": 0.12632978723404256, "grad_norm": 476.299462569904, "learning_rate": 1.9980981840170147e-07, "loss": 1.0682, "step": 760, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4864864864864865, "success_rate.epoch.env.logic": 0.32954545454545453, "success_rate.epoch.env.math": 0.8224852071005917, "success_rate.epoch.env.sat": 0.08196721311475409, "success_rate.epoch.env.science": 0.5865800865800865, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5067028285903672, "success_rate.epoch.global": 0.5514603616133519, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.8928571428571428, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9942307692307693, "tokens_p.mean_in_band": 0.7045641447368421, "tokens_rate.above_band": 0.9647495361781077, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03525046382189239 }, { "epoch": 0.12716090425531915, "grad_norm": 336.57319710425196, "learning_rate": 1.9980613519436833e-07, "loss": 1.1062, "step": 765, "success_rate.epoch.env.abd": 0.4166666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4864864864864865, "success_rate.epoch.env.logic": 0.327683615819209, "success_rate.epoch.env.math": 0.8245614035087719, "success_rate.epoch.env.sat": 0.08196721311475409, "success_rate.epoch.env.science": 0.5862068965517241, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5066883892861465, "success_rate.epoch.global": 0.5515570934256055, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.6333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9925204918032787, "tokens_p.mean_in_band": 0.6488850911458334, "tokens_rate.above_band": 0.8640226628895185, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1359773371104816 }, { "epoch": 0.12799202127659576, "grad_norm": 211.09462540375193, "learning_rate": 1.998024168359975e-07, "loss": 1.0549, "step": 770, "success_rate.epoch.env.abd": 0.38461538461538464, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4864864864864865, "success_rate.epoch.env.logic": 0.327683615819209, "success_rate.epoch.env.math": 0.8245614035087719, "success_rate.epoch.env.sat": 0.08196721311475409, "success_rate.epoch.env.science": 0.5859124866595518, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5037478718367416, "success_rate.epoch.global": 0.5512027491408935, "success_rate.window.env.abd": 0.0, "success_rate.window.env.science": 0.5555555555555556, "success_rate.window.env_macro_mean": 0.2777777777777778, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9906496062992126, "tokens_p.mean_below_band": 4.843059286940843e-11, "tokens_p.mean_in_band": 0.33334294280442806, "tokens_rate.above_band": 0.5834609494640123, "tokens_rate.below_band": 0.0015313935681470138, "tokens_rate.in_band": 0.41500765696784075 }, { "epoch": 0.12882313829787234, "grad_norm": 248.41363173656077, "learning_rate": 1.9979866333318816e-07, "loss": 1.1377, "step": 775, "success_rate.epoch.env.abd": 0.35714285714285715, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4864864864864865, "success_rate.epoch.env.logic": 0.3258426966292135, "success_rate.epoch.env.math": 0.8197674418604651, "success_rate.epoch.env.sat": 0.08064516129032258, "success_rate.epoch.env.science": 0.5881104033970276, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5007268224369429, "success_rate.epoch.global": 0.5512295081967213, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.16666666666666669, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9977104755784062, "tokens_p.mean_in_band": 0.3172961353550296, "tokens_rate.above_band": 0.6971326164874552, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.30286738351254483 }, { "epoch": 0.12965425531914893, "grad_norm": 259.3579126149414, "learning_rate": 1.9979487469260186e-07, "loss": 0.8702, "step": 780, "success_rate.epoch.env.abd": 0.35714285714285715, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47368421052631576, "success_rate.epoch.env.logic": 0.3258426966292135, "success_rate.epoch.env.math": 0.8218390804597702, "success_rate.epoch.env.sat": 0.08064516129032258, "success_rate.epoch.env.science": 0.5848261327713382, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49945273989271066, "success_rate.epoch.global": 0.5495251017639078, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.14285714285714285, "success_rate.window.env_macro_mean": 0.38095238095238093, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9923336988304093, "tokens_p.mean_below_band": 1.525040715932846e-08, "tokens_p.mean_in_band": 0.5209904813218391, "tokens_rate.above_band": 0.8673598782652803, "tokens_rate.below_band": 0.000253613999492772, "tokens_rate.in_band": 0.13238650773522698 }, { "epoch": 0.13048537234042554, "grad_norm": 458.49301481412226, "learning_rate": 1.997910509209625e-07, "loss": 1.2074, "step": 785, "success_rate.epoch.env.abd": 0.35714285714285715, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47368421052631576, "success_rate.epoch.env.logic": 0.3258426966292135, "success_rate.epoch.env.math": 0.8228571428571428, "success_rate.epoch.env.sat": 0.08064516129032258, "success_rate.epoch.env.science": 0.5851619644723093, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4995758211743783, "success_rate.epoch.global": 0.550236008091706, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5555555555555556, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9866504854368932, "tokens_p.mean_in_band": 0.5775240384615384, "tokens_rate.above_band": 0.7984496124031008, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.20155038759689922 }, { "epoch": 0.13131648936170212, "grad_norm": 384.2247557940492, "learning_rate": 1.9978719202505634e-07, "loss": 1.3276, "step": 790, "success_rate.epoch.env.abd": 0.35714285714285715, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47368421052631576, "success_rate.epoch.env.logic": 0.329608938547486, "success_rate.epoch.env.math": 0.8202247191011236, "success_rate.epoch.env.sat": 0.08064516129032258, "success_rate.epoch.env.science": 0.5848074921956296, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4996466708003395, "success_rate.epoch.global": 0.5506371562709591, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.7833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9902777777777778, "tokens_p.mean_in_band": 0.5003255208333334, "tokens_rate.above_band": 0.8823529411764706, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11764705882352941 }, { "epoch": 0.13214760638297873, "grad_norm": 263.13375618019256, "learning_rate": 1.9978329801173197e-07, "loss": 0.9061, "step": 795, "success_rate.epoch.env.abd": 0.35714285714285715, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47368421052631576, "success_rate.epoch.env.logic": 0.32967032967032966, "success_rate.epoch.env.math": 0.8222222222222222, "success_rate.epoch.env.sat": 0.07936507936507936, "success_rate.epoch.env.science": 0.5865284974093264, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4998739269396482, "success_rate.epoch.global": 0.5516322451698867, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9979990118577076, "tokens_p.mean_in_band": 0.6232142857142857, "tokens_rate.above_band": 0.947565543071161, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.052434456928838954 }, { "epoch": 0.13297872340425532, "grad_norm": 424.0257319612182, "learning_rate": 1.9977936888790032e-07, "loss": 1.2945, "step": 800, "success_rate.epoch.env.abd": 0.35714285714285715, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48717948717948717, "success_rate.epoch.env.logic": 0.33152173913043476, "success_rate.epoch.env.math": 0.8222222222222222, "success_rate.epoch.env.sat": 0.07936507936507936, "success_rate.epoch.env.science": 0.5876288659793815, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.501369113729042, "success_rate.epoch.global": 0.5526838966202783, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9958091341077085, "tokens_p.mean_below_band": 6.007030606269836e-08, "tokens_p.mean_in_band": 0.5141872829861112, "tokens_rate.above_band": 0.9623983739837398, "tokens_rate.below_band": 0.0010162601626016261, "tokens_rate.in_band": 0.036585365853658534 }, { "epoch": 0.13380984042553193, "grad_norm": 158.49781613193912, "learning_rate": 1.9977540466053465e-07, "loss": 1.133, "step": 805, "success_rate.epoch.env.abd": 0.35714285714285715, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48717948717948717, "success_rate.epoch.env.logic": 0.33152173913043476, "success_rate.epoch.env.math": 0.8241758241758241, "success_rate.epoch.env.sat": 0.07936507936507936, "success_rate.epoch.env.science": 0.5870901639344263, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5014977409934643, "success_rate.epoch.global": 0.5530652603823335, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.6666666666666666, "tokens_p.mean_above_band": 0.9823069852941176, "tokens_p.mean_in_band": 0.5409458705357143, "tokens_rate.above_band": 0.8662420382165605, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1337579617834395 }, { "epoch": 0.1346409574468085, "grad_norm": 228.55388713767965, "learning_rate": 1.9977140533667043e-07, "loss": 1.1052, "step": 810, "success_rate.epoch.env.abd": 0.35714285714285715, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48717948717948717, "success_rate.epoch.env.logic": 0.33152173913043476, "success_rate.epoch.env.math": 0.827027027027027, "success_rate.epoch.env.sat": 0.07936507936507936, "success_rate.epoch.env.science": 0.5845213849287169, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5015234158885092, "success_rate.epoch.global": 0.5524246395806028, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.2857142857142857, "success_rate.window.env_macro_mean": 0.6428571428571428, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9925742574257426, "tokens_p.mean_in_band": 0.5675048828125, "tokens_rate.above_band": 0.7593984962406015, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.24060150375939848 }, { "epoch": 0.1354720744680851, "grad_norm": 187.5224832466495, "learning_rate": 1.9976737092340552e-07, "loss": 0.8616, "step": 815, "success_rate.epoch.env.abd": 0.35714285714285715, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48717948717948717, "success_rate.epoch.env.logic": 0.3333333333333333, "success_rate.epoch.env.math": 0.8297872340425532, "success_rate.epoch.env.sat": 0.07936507936507936, "success_rate.epoch.env.science": 0.5866261398176292, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5021303755346308, "success_rate.epoch.global": 0.5546875, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9916845493562232, "tokens_p.mean_in_band": 0.755859375, "tokens_rate.above_band": 0.9173228346456693, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08267716535433071 }, { "epoch": 0.1363031914893617, "grad_norm": 238.9270160310689, "learning_rate": 1.9976330142790002e-07, "loss": 1.0656, "step": 820, "success_rate.epoch.env.abd": 0.35714285714285715, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48717948717948717, "success_rate.epoch.env.logic": 0.33689839572192515, "success_rate.epoch.env.math": 0.8272251308900523, "success_rate.epoch.env.sat": 0.078125, "success_rate.epoch.env.science": 0.5866935483870968, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5021149472110381, "success_rate.epoch.global": 0.5549805950840879, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.5666666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9918193717277487, "tokens_p.mean_in_band": 0.60546875, "tokens_rate.above_band": 0.8603603603603603, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13963963963963963 }, { "epoch": 0.1371343085106383, "grad_norm": 152.62611687821536, "learning_rate": 1.9975919685737626e-07, "loss": 0.9879, "step": 825, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40816326530612246, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48717948717948717, "success_rate.epoch.env.logic": 0.3404255319148936, "success_rate.epoch.env.math": 0.828125, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.5867602808425276, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5055372704999491, "success_rate.epoch.global": 0.5552699228791774, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9945750452079566, "tokens_p.mean_in_band": 0.6637290396341463, "tokens_rate.above_band": 0.930976430976431, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06902356902356903 }, { "epoch": 0.1379654255319149, "grad_norm": 197.37937153873247, "learning_rate": 1.9975505721911885e-07, "loss": 1.2908, "step": 830, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40816326530612246, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.475, "success_rate.epoch.env.logic": 0.3386243386243386, "success_rate.epoch.env.math": 0.8238341968911918, "success_rate.epoch.env.sat": 0.07575757575757576, "success_rate.epoch.env.science": 0.5872382851445663, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5038137268233752, "success_rate.epoch.global": 0.5542784163473818, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.13333333333333333, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9920258620689655, "tokens_p.mean_in_band": 0.6602233420801527, "tokens_rate.above_band": 0.8856893542757417, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1143106457242583 }, { "epoch": 0.1387965425531915, "grad_norm": 168.67250919631232, "learning_rate": 1.9975088252047466e-07, "loss": 0.8651, "step": 835, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40816326530612246, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.475, "success_rate.epoch.env.logic": 0.3386243386243386, "success_rate.epoch.env.math": 0.826530612244898, "success_rate.epoch.env.sat": 0.07575757575757576, "success_rate.epoch.env.science": 0.5881188118811881, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5041389033770414, "success_rate.epoch.global": 0.5558375634517766, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.8571428571428572, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9878826530612245, "tokens_p.mean_in_band": 0.6030441810344828, "tokens_rate.above_band": 0.7716535433070866, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2283464566929134 }, { "epoch": 0.13962765957446807, "grad_norm": 236.92745478527485, "learning_rate": 1.9974667276885278e-07, "loss": 1.2177, "step": 840, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40816326530612246, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.475, "success_rate.epoch.env.logic": 0.3386243386243386, "success_rate.epoch.env.math": 0.8282828282828283, "success_rate.epoch.env.sat": 0.07462686567164178, "success_rate.epoch.env.science": 0.5895669291338582, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5043270509411021, "success_rate.epoch.global": 0.5570977917981073, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9927591463414634, "tokens_p.mean_in_band": 0.6500509510869565, "tokens_rate.above_band": 0.780952380952381, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.21904761904761905 }, { "epoch": 0.14045877659574468, "grad_norm": 254.94563378569697, "learning_rate": 1.997424279717244e-07, "loss": 1.0805, "step": 845, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40816326530612246, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.475, "success_rate.epoch.env.logic": 0.3386243386243386, "success_rate.epoch.env.math": 0.8291457286432161, "success_rate.epoch.env.sat": 0.07462686567164178, "success_rate.epoch.env.science": 0.5888671875, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5043418835526047, "success_rate.epoch.global": 0.5570890840652447, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5555555555555556, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9879966887417219, "tokens_p.mean_in_band": 0.6565104166666667, "tokens_rate.above_band": 0.8342541436464088, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16574585635359115 }, { "epoch": 0.14128989361702127, "grad_norm": 214.58383700275985, "learning_rate": 1.9973814813662307e-07, "loss": 0.8665, "step": 850, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40816326530612246, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.3403141361256545, "success_rate.epoch.env.math": 0.8258706467661692, "success_rate.epoch.env.sat": 0.07462686567164178, "success_rate.epoch.env.science": 0.5904669260700389, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5055072773928855, "success_rate.epoch.global": 0.5583281347473488, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9921818510484454, "tokens_p.mean_in_band": 0.7228064903846154, "tokens_rate.above_band": 0.946611909650924, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.053388090349075976 }, { "epoch": 0.14212101063829788, "grad_norm": 165.4617685728654, "learning_rate": 1.9973383327114441e-07, "loss": 0.8949, "step": 855, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40816326530612246, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.33678756476683935, "success_rate.epoch.env.math": 0.8267326732673267, "success_rate.epoch.env.sat": 0.07352941176470588, "success_rate.epoch.env.science": 0.5914811229428848, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5052574772209085, "success_rate.epoch.global": 0.5583126550868487, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.997221634208298, "tokens_p.mean_in_band": 0.5999710648148148, "tokens_rate.above_band": 0.9358161648177497, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06418383518225039 }, { "epoch": 0.14295212765957446, "grad_norm": 165.4439759297413, "learning_rate": 1.9972948338294628e-07, "loss": 1.1022, "step": 860, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40816326530612246, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.3384615384615385, "success_rate.epoch.env.math": 0.8275862068965517, "success_rate.epoch.env.sat": 0.07352941176470588, "success_rate.epoch.env.science": 0.5905587668593449, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5054034000609433, "success_rate.epoch.global": 0.5580246913580247, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.6428571428571429, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9907718120805369, "tokens_p.mean_in_band": 0.6482354525862069, "tokens_rate.above_band": 0.8370786516853933, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16292134831460675 }, { "epoch": 0.14378324468085107, "grad_norm": 376.50478028307185, "learning_rate": 1.9972509847974864e-07, "loss": 0.9464, "step": 865, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40816326530612246, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.3384615384615385, "success_rate.epoch.env.math": 0.8235294117647058, "success_rate.epoch.env.sat": 0.07352941176470588, "success_rate.epoch.env.science": 0.5904306220095694, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.505022950971705, "success_rate.epoch.global": 0.5577395577395577, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.986328125, "tokens_p.mean_below_band": 9.255018085241318e-09, "tokens_p.mean_in_band": 0.6807291666666667, "tokens_rate.above_band": 0.8228571428571428, "tokens_rate.below_band": 0.005714285714285714, "tokens_rate.in_band": 0.17142857142857143 }, { "epoch": 0.14461436170212766, "grad_norm": 590.7139396862939, "learning_rate": 1.9972067856933363e-07, "loss": 0.9558, "step": 870, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40816326530612246, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.3384615384615385, "success_rate.epoch.env.math": 0.8260869565217391, "success_rate.epoch.env.sat": 0.07352941176470588, "success_rate.epoch.env.science": 0.5904761904761905, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5042037429140096, "success_rate.epoch.global": 0.5583384239462431, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9877976190476191, "tokens_p.mean_in_band": 0.7350383254716981, "tokens_rate.above_band": 0.9224011713030746, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07759882869692533 }, { "epoch": 0.14544547872340424, "grad_norm": 356.7976976301709, "learning_rate": 1.9971622365954551e-07, "loss": 1.0622, "step": 875, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40816326530612246, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.34517766497461927, "success_rate.epoch.env.math": 0.8269230769230769, "success_rate.epoch.env.sat": 0.07352941176470588, "success_rate.epoch.env.science": 0.5909090909090909, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5049296654001295, "success_rate.epoch.global": 0.5595382746051033, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.8571428571428571, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9899655963302753, "tokens_p.mean_in_band": 0.5986578525641025, "tokens_rate.above_band": 0.8482490272373541, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1517509727626459 }, { "epoch": 0.14627659574468085, "grad_norm": 135.93263081715523, "learning_rate": 1.9971173375829065e-07, "loss": 1.1486, "step": 880, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40816326530612246, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.3434343434343434, "success_rate.epoch.env.math": 0.8229665071770335, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.5916824196597353, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5042908105936014, "success_rate.epoch.global": 0.5587167070217918, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.15, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.997431506849315, "tokens_p.mean_below_band": 4.6566128730773926e-09, "tokens_p.mean_in_band": 0.60859375, "tokens_rate.above_band": 0.9073446327683616, "tokens_rate.below_band": 0.0022598870056497176, "tokens_rate.in_band": 0.0903954802259887 }, { "epoch": 0.14710771276595744, "grad_norm": 198.0059643602478, "learning_rate": 1.9970720887353752e-07, "loss": 1.0517, "step": 885, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40816326530612246, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.3415841584158416, "success_rate.epoch.env.math": 0.8254716981132075, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.5915094339622642, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5043346306136196, "success_rate.epoch.global": 0.5586995785671283, "success_rate.window.env.logic": 0.25, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6388888888888888, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9962780355761794, "tokens_p.mean_in_band": 0.6142743644067796, "tokens_rate.above_band": 0.9563609467455622, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04363905325443787 }, { "epoch": 0.14793882978723405, "grad_norm": 151.92573701627512, "learning_rate": 1.9970264901331668e-07, "loss": 0.9846, "step": 890, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.3399014778325123, "success_rate.epoch.env.math": 0.822429906542056, "success_rate.epoch.env.sat": 0.07042253521126761, "success_rate.epoch.env.science": 0.5915492957746479, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5030751840803903, "success_rate.epoch.global": 0.5577498503889886, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.22000000000000003, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9970122739018088, "tokens_p.mean_in_band": 0.5871834590517241, "tokens_rate.above_band": 0.8696629213483146, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1303370786516854 }, { "epoch": 0.14876994680851063, "grad_norm": 265.5710421924225, "learning_rate": 1.996980541857208e-07, "loss": 0.8005, "step": 895, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46511627906976744, "success_rate.epoch.env.logic": 0.3431372549019608, "success_rate.epoch.env.math": 0.8232558139534883, "success_rate.epoch.env.sat": 0.06944444444444445, "success_rate.epoch.env.science": 0.5919701213818861, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5023870224622619, "success_rate.epoch.global": 0.5580011897679953, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9950149638084632, "tokens_p.mean_below_band": 7.188646122813225e-09, "tokens_p.mean_in_band": 0.5166774120145631, "tokens_rate.above_band": 0.8968789013732834, "tokens_rate.below_band": 0.00024968789013732833, "tokens_rate.in_band": 0.10287141073657928 }, { "epoch": 0.14960106382978725, "grad_norm": 1009.3990332159907, "learning_rate": 1.9969342439890452e-07, "loss": 1.2037, "step": 900, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46511627906976744, "success_rate.epoch.env.logic": 0.3431372549019608, "success_rate.epoch.env.math": 0.8202764976958525, "success_rate.epoch.env.sat": 0.06944444444444445, "success_rate.epoch.env.science": 0.5936920222634509, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5022727119735281, "success_rate.epoch.global": 0.5591715976331361, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6785714285714286, "success_rate.window.global": 0.7777777777777778, "tokens_p.mean_above_band": 0.990946261682243, "tokens_p.mean_below_band": 2.648448571562767e-09, "tokens_p.mean_in_band": 0.6962425595238095, "tokens_rate.above_band": 0.8294573643410853, "tokens_rate.below_band": 0.007751937984496124, "tokens_rate.in_band": 0.16279069767441862 }, { "epoch": 0.15043218085106383, "grad_norm": 315.54852445723196, "learning_rate": 1.996887596610846e-07, "loss": 1.0878, "step": 905, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46511627906976744, "success_rate.epoch.env.logic": 0.34299516908212563, "success_rate.epoch.env.math": 0.817351598173516, "success_rate.epoch.env.sat": 0.0684931506849315, "success_rate.epoch.env.science": 0.5933456561922366, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5018759260487191, "success_rate.epoch.global": 0.558235294117647, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9994387755102041, "tokens_p.mean_in_band": 0.5933657786885246, "tokens_rate.above_band": 0.9525660964230172, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04743390357698289 }, { "epoch": 0.1512632978723404, "grad_norm": 114.86361275976537, "learning_rate": 1.9968405998053982e-07, "loss": 1.239, "step": 910, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46511627906976744, "success_rate.epoch.env.logic": 0.34299516908212563, "success_rate.epoch.env.math": 0.8198198198198198, "success_rate.epoch.env.sat": 0.06756756756756757, "success_rate.epoch.env.science": 0.594296228150874, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5021025815475897, "success_rate.epoch.global": 0.5593914569923932, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9897836538461539, "tokens_p.mean_in_band": 0.5973668981481481, "tokens_rate.above_band": 0.8524590163934426, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14754098360655737 }, { "epoch": 0.15209441489361702, "grad_norm": 300.2475570926112, "learning_rate": 1.9967932536561096e-07, "loss": 0.8637, "step": 915, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46511627906976744, "success_rate.epoch.env.logic": 0.3492822966507177, "success_rate.epoch.env.math": 0.8198198198198198, "success_rate.epoch.env.sat": 0.06756756756756757, "success_rate.epoch.env.science": 0.5954337899543379, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5027775533086858, "success_rate.epoch.global": 0.5607911576497964, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9950592885375494, "tokens_p.mean_in_band": 0.6321022727272727, "tokens_rate.above_band": 0.8846153846153846, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11538461538461539 }, { "epoch": 0.1529255319148936, "grad_norm": 395.5290813945582, "learning_rate": 1.9967455582470078e-07, "loss": 0.8142, "step": 920, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46511627906976744, "success_rate.epoch.env.logic": 0.3443396226415094, "success_rate.epoch.env.math": 0.8198198198198198, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.5969062784349408, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5023801818150943, "success_rate.epoch.global": 0.5605095541401274, "success_rate.window.env.logic": 0.25, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4166666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9983179886685553, "tokens_p.mean_in_band": 0.576514175257732, "tokens_rate.above_band": 0.9479054779806659, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05209452201933405 }, { "epoch": 0.15375664893617022, "grad_norm": 325.8851761769983, "learning_rate": 1.9966975136627408e-07, "loss": 0.9141, "step": 925, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46511627906976744, "success_rate.epoch.env.logic": 0.3474178403755869, "success_rate.epoch.env.math": 0.820627802690583, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.5987318840579711, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5028994369267189, "success_rate.epoch.global": 0.5622837370242214, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9959124472573839, "tokens_p.mean_in_band": 0.6730057565789473, "tokens_rate.above_band": 0.8618181818181818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13818181818181818 }, { "epoch": 0.1545877659574468, "grad_norm": 218.2941752833363, "learning_rate": 1.9966491199885754e-07, "loss": 0.9742, "step": 930, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40384615384615385, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46511627906976744, "success_rate.epoch.env.logic": 0.35046728971962615, "success_rate.epoch.env.math": 0.820627802690583, "success_rate.epoch.env.sat": 0.06578947368421052, "success_rate.epoch.env.science": 0.5987376014427412, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.503447084889674, "success_rate.epoch.global": 0.5622489959839357, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.997030998389694, "tokens_p.mean_in_band": 0.6196428571428572, "tokens_rate.above_band": 0.9466463414634146, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.053353658536585365 }, { "epoch": 0.15541888297872342, "grad_norm": 231.46974310582704, "learning_rate": 1.9966003773103992e-07, "loss": 0.9837, "step": 935, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40384615384615385, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.35046728971962615, "success_rate.epoch.env.math": 0.8214285714285714, "success_rate.epoch.env.sat": 0.06578947368421052, "success_rate.epoch.env.science": 0.600358422939068, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5027062454087653, "success_rate.epoch.global": 0.5633561643835616, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4642857142857143, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9895220588235294, "tokens_p.mean_in_band": 0.5516921612394958, "tokens_rate.above_band": 0.8108108108108109, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1891891891891892 }, { "epoch": 0.15625, "grad_norm": 228.22652972071526, "learning_rate": 1.996551285714718e-07, "loss": 1.0574, "step": 940, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40384615384615385, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.35046728971962615, "success_rate.epoch.env.math": 0.8230088495575221, "success_rate.epoch.env.sat": 0.06578947368421052, "success_rate.epoch.env.science": 0.6014234875444839, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5029467311118895, "success_rate.epoch.global": 0.5646992054483542, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9824766355140186, "tokens_p.mean_below_band": 9.918585419654846e-08, "tokens_p.mean_in_band": 0.75390625, "tokens_rate.above_band": 0.8699186991869918, "tokens_rate.below_band": 0.008130081300813009, "tokens_rate.in_band": 0.12195121951219512 }, { "epoch": 0.15708111702127658, "grad_norm": 616.08098770576, "learning_rate": 1.9965018452886574e-07, "loss": 0.8494, "step": 945, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40384615384615385, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4444444444444444, "success_rate.epoch.env.logic": 0.3488372093023256, "success_rate.epoch.env.math": 0.8253275109170306, "success_rate.epoch.env.sat": 0.06578947368421052, "success_rate.epoch.env.science": 0.6019503546099291, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.502138952739766, "success_rate.epoch.global": 0.5652173913043478, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9980137711864406, "tokens_p.mean_below_band": 2.143982176979383e-09, "tokens_p.mean_in_band": 0.7151898734177216, "tokens_rate.above_band": 0.97736057426836, "tokens_rate.below_band": 0.0008282716731087797, "tokens_rate.in_band": 0.021811154058531197 }, { "epoch": 0.1579122340425532, "grad_norm": 274.1340082487527, "learning_rate": 1.9964520561199625e-07, "loss": 1.0499, "step": 950, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39622641509433965, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4444444444444444, "success_rate.epoch.env.logic": 0.3456221198156682, "success_rate.epoch.env.math": 0.8253275109170306, "success_rate.epoch.env.sat": 0.06578947368421052, "success_rate.epoch.env.science": 0.6019417475728155, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5011531858965311, "success_rate.epoch.global": 0.5643620011242271, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.16666666666666666, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9974349937343359, "tokens_p.mean_in_band": 0.3192830550541516, "tokens_rate.above_band": 0.8521089161772557, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14789108382274427 }, { "epoch": 0.15874335106382978, "grad_norm": 276.59972713042356, "learning_rate": 1.9964019182969965e-07, "loss": 1.042, "step": 955, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39622641509433965, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4444444444444444, "success_rate.epoch.env.logic": 0.3486238532110092, "success_rate.epoch.env.math": 0.8260869565217391, "success_rate.epoch.env.sat": 0.06578947368421052, "success_rate.epoch.env.science": 0.6012269938650306, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5014301336503734, "success_rate.epoch.global": 0.5645612073784237, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9913793103448276, "tokens_p.mean_in_band": 0.5730794270833334, "tokens_rate.above_band": 0.8787878787878788, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12121212121212122 }, { "epoch": 0.1595744680851064, "grad_norm": 292.3355189996635, "learning_rate": 1.9963514319087415e-07, "loss": 1.0451, "step": 960, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39622641509433965, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4444444444444444, "success_rate.epoch.env.logic": 0.3470319634703196, "success_rate.epoch.env.math": 0.8283261802575107, "success_rate.epoch.env.sat": 0.06578947368421052, "success_rate.epoch.env.science": 0.6010498687664042, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5014728799136875, "success_rate.epoch.global": 0.5649025069637883, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9996360989810772, "tokens_p.mean_in_band": 0.6727678571428571, "tokens_rate.above_band": 0.9515235457063712, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04847645429362881 }, { "epoch": 0.16040558510638298, "grad_norm": 226.28395497949296, "learning_rate": 1.996300597044799e-07, "loss": 1.0401, "step": 965, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39622641509433965, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.3470319634703196, "success_rate.epoch.env.math": 0.8247863247863247, "success_rate.epoch.env.sat": 0.06493506493506493, "success_rate.epoch.env.science": 0.6022628372497825, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5022816070908727, "success_rate.epoch.global": 0.565410199556541, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9897378177966102, "tokens_p.mean_in_band": 0.686141304347826, "tokens_rate.above_band": 0.9111969111969112, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0888030888030888 }, { "epoch": 0.1612367021276596, "grad_norm": 232.84025810415997, "learning_rate": 1.9962494137953883e-07, "loss": 1.0182, "step": 970, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39622641509433965, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.3470319634703196, "success_rate.epoch.env.math": 0.8235294117647058, "success_rate.epoch.env.sat": 0.06493506493506493, "success_rate.epoch.env.science": 0.6013864818024264, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5020876735936931, "success_rate.epoch.global": 0.5653612796469939, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.3833333333333333, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9948261589403974, "tokens_p.mean_below_band": 2.1047890186309814e-07, "tokens_p.mean_in_band": 0.5435697115384616, "tokens_rate.above_band": 0.937888198757764, "tokens_rate.below_band": 0.0015527950310559005, "tokens_rate.in_band": 0.06055900621118013 }, { "epoch": 0.16206781914893617, "grad_norm": 406.02213582225613, "learning_rate": 1.996197882251347e-07, "loss": 0.8577, "step": 975, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39622641509433965, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.35, "success_rate.epoch.env.math": 0.825, "success_rate.epoch.env.sat": 0.06493506493506493, "success_rate.epoch.env.science": 0.6008620689655172, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.502443511041699, "success_rate.epoch.global": 0.5658616904500549, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.993195564516129, "tokens_p.mean_in_band": 0.6822467672413793, "tokens_rate.above_band": 0.8953068592057761, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10469314079422383 }, { "epoch": 0.16289893617021275, "grad_norm": 242.50168808098468, "learning_rate": 1.996146002504131e-07, "loss": 0.9381, "step": 980, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39622641509433965, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.35135135135135137, "success_rate.epoch.env.math": 0.825, "success_rate.epoch.env.sat": 0.0641025641025641, "success_rate.epoch.env.science": 0.6018916595012898, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5025842784103012, "success_rate.epoch.global": 0.5661925601750547, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.275, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9987796373779637, "tokens_p.mean_below_band": 4.3655745685100555e-09, "tokens_p.mean_in_band": 0.5609580592105263, "tokens_rate.above_band": 0.9251612903225807, "tokens_rate.below_band": 0.0012903225806451613, "tokens_rate.in_band": 0.07354838709677419 }, { "epoch": 0.16373005319148937, "grad_norm": 121.89361802490136, "learning_rate": 1.9960937746458141e-07, "loss": 1.3233, "step": 985, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39622641509433965, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.34977578475336324, "success_rate.epoch.env.math": 0.8264462809917356, "success_rate.epoch.env.sat": 0.06329113924050633, "success_rate.epoch.env.science": 0.602224123182207, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5025289832478109, "success_rate.epoch.global": 0.5663764961915125, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.41666666666666663, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9955420197740112, "tokens_p.mean_in_band": 0.637217420212766, "tokens_rate.above_band": 0.937748344370861, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06225165562913907 }, { "epoch": 0.16456117021276595, "grad_norm": 237.6764822308073, "learning_rate": 1.996041198769088e-07, "loss": 0.9872, "step": 990, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39622641509433965, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.34977578475336324, "success_rate.epoch.env.math": 0.8271604938271605, "success_rate.epoch.env.sat": 0.06329113924050633, "success_rate.epoch.env.science": 0.6015293118096856, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5025307470171657, "success_rate.epoch.global": 0.5663237682728749, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9861918604651163, "tokens_p.mean_in_band": 0.6541078629032258, "tokens_rate.above_band": 0.80625, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.19375 }, { "epoch": 0.16539228723404256, "grad_norm": 139.83144603807995, "learning_rate": 1.9959882749672624e-07, "loss": 0.8594, "step": 995, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39622641509433965, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.35267857142857145, "success_rate.epoch.env.math": 0.8237704918032787, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.6028789161727349, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5025372242693357, "success_rate.epoch.global": 0.5668824163969794, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4285714285714286, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9883373205741627, "tokens_p.mean_in_band": 0.5617357336956522, "tokens_rate.above_band": 0.8196078431372549, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1803921568627451 }, { "epoch": 0.16622340425531915, "grad_norm": 110.73587835971473, "learning_rate": 1.9959350033342633e-07, "loss": 1.0128, "step": 1000, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39622641509433965, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44680851063829785, "success_rate.epoch.env.logic": 0.35398230088495575, "success_rate.epoch.env.math": 0.8237704918032787, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.603201347935973, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5018020363354707, "success_rate.epoch.global": 0.5668276972624798, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.38888888888888884, "success_rate.window.global": 0.5555555555555556, "tokens_p.mean_above_band": 0.9904321598101266, "tokens_p.mean_in_band": 0.5934111015981736, "tokens_rate.above_band": 0.8523263654753878, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14767363452461227 }, { "epoch": 0.16705452127659576, "grad_norm": 193.38872491235324, "learning_rate": 1.9958813839646348e-07, "loss": 0.9668, "step": 1005, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39622641509433965, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4375, "success_rate.epoch.env.logic": 0.3508771929824561, "success_rate.epoch.env.math": 0.8244897959183674, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.6031879194630873, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5007376960719622, "success_rate.epoch.global": 0.5662393162393162, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.41666666666666663, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9968643707482994, "tokens_p.mean_below_band": 3.3155083656311035e-07, "tokens_p.mean_in_band": 0.5047869694960212, "tokens_rate.above_band": 0.9156840934371524, "tokens_rate.below_band": 0.0004449388209121246, "tokens_rate.in_band": 0.08387096774193549 }, { "epoch": 0.16788563829787234, "grad_norm": 146.3020329942035, "learning_rate": 1.9958274169535392e-07, "loss": 0.8669, "step": 1010, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.42857142857142855, "success_rate.epoch.env.logic": 0.34934497816593885, "success_rate.epoch.env.math": 0.8218623481781376, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.6036789297658863, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5006089477656483, "success_rate.epoch.global": 0.5661881977671451, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.42000000000000004, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9983982491753363, "tokens_p.mean_below_band": 3.688037395477295e-07, "tokens_p.mean_in_band": 0.568425422705314, "tokens_rate.above_band": 0.9498674379368522, "tokens_rate.below_band": 0.00024102193299590263, "tokens_rate.in_band": 0.049891540130151846 }, { "epoch": 0.16871675531914893, "grad_norm": 399.96006466207245, "learning_rate": 1.9957731023967538e-07, "loss": 1.0633, "step": 1015, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.42857142857142855, "success_rate.epoch.env.logic": 0.35064935064935066, "success_rate.epoch.env.math": 0.8192771084337349, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.6025, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.500385329854114, "success_rate.epoch.global": 0.5653785071466384, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.16666666666666666, "success_rate.window.env_macro_mean": 0.3888888888888889, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9959720254957507, "tokens_p.mean_in_band": 0.5634548611111111, "tokens_rate.above_band": 0.9400798934753661, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05992010652463382 }, { "epoch": 0.16954787234042554, "grad_norm": 123.52066398196166, "learning_rate": 1.9957184403906738e-07, "loss": 0.8311, "step": 1020, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.42, "success_rate.epoch.env.logic": 0.35064935064935066, "success_rate.epoch.env.math": 0.8207171314741036, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.6044776119402985, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4999168031640448, "success_rate.epoch.global": 0.5669125395152792, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9859549409061064, "tokens_p.mean_in_band": 0.5458587398373984, "tokens_rate.above_band": 0.7796263117481443, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.22037368825185563 }, { "epoch": 0.17037898936170212, "grad_norm": 134.20513569651126, "learning_rate": 1.995663431032311e-07, "loss": 1.0981, "step": 1025, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.42, "success_rate.epoch.env.logic": 0.35344827586206895, "success_rate.epoch.env.math": 0.8207171314741036, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.6044591246903386, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4994961695781676, "success_rate.epoch.global": 0.5669291338582677, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.994140625, "tokens_p.mean_in_band": 0.5643643465909091, "tokens_rate.above_band": 0.9385474860335196, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.061452513966480445 }, { "epoch": 0.17121010638297873, "grad_norm": 165.92382388904176, "learning_rate": 1.9956080744192937e-07, "loss": 0.8825, "step": 1030, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.42, "success_rate.epoch.env.logic": 0.358974358974359, "success_rate.epoch.env.math": 0.8207171314741036, "success_rate.epoch.env.sat": 0.06172839506172839, "success_rate.epoch.env.science": 0.6049382716049383, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.499971953677133, "success_rate.epoch.global": 0.5674686192468619, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.5238095238095238, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9903935185185185, "tokens_p.mean_in_band": 0.6736971227134146, "tokens_rate.above_band": 0.8681672025723473, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13183279742765272 }, { "epoch": 0.17204122340425532, "grad_norm": 155.7053428936629, "learning_rate": 1.995552370649866e-07, "loss": 1.1059, "step": 1035, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.42, "success_rate.epoch.env.logic": 0.3559322033898305, "success_rate.epoch.env.math": 0.8207171314741036, "success_rate.epoch.env.sat": 0.06172839506172839, "success_rate.epoch.env.science": 0.6072013093289689, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49990112478072407, "success_rate.epoch.global": 0.5684539302446643, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.4375, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9987157534246576, "tokens_p.mean_in_band": 0.6636202830188679, "tokens_rate.above_band": 0.9323116219667944, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06768837803320563 }, { "epoch": 0.17287234042553193, "grad_norm": 155.65767786776823, "learning_rate": 1.9954963198228883e-07, "loss": 0.8423, "step": 1040, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.42, "success_rate.epoch.env.logic": 0.3559322033898305, "success_rate.epoch.env.math": 0.8181818181818182, "success_rate.epoch.env.sat": 0.06172839506172839, "success_rate.epoch.env.science": 0.6083061889250815, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4997710853537993, "success_rate.epoch.global": 0.5692068429237948, "success_rate.window.env.abd": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4523809523809524, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9857519157088123, "tokens_p.mean_in_band": 0.46028343023255813, "tokens_rate.above_band": 0.5483193277310925, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.45168067226890757 }, { "epoch": 0.1737034574468085, "grad_norm": 126.90697398168803, "learning_rate": 1.9954399220378377e-07, "loss": 0.9028, "step": 1045, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43137254901960786, "success_rate.epoch.env.logic": 0.35443037974683544, "success_rate.epoch.env.math": 0.8188976377952756, "success_rate.epoch.env.sat": 0.06172839506172839, "success_rate.epoch.env.science": 0.6097165991902834, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5008617176497331, "success_rate.epoch.global": 0.5703971119133574, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9917441291585127, "tokens_p.mean_in_band": 0.6861979166666666, "tokens_rate.above_band": 0.9659735349716446, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034026465028355386 }, { "epoch": 0.1745345744680851, "grad_norm": 248.92500058424363, "learning_rate": 1.9953831773948058e-07, "loss": 1.0204, "step": 1050, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43137254901960786, "success_rate.epoch.env.logic": 0.35714285714285715, "success_rate.epoch.env.math": 0.8196078431372549, "success_rate.epoch.env.sat": 0.06172839506172839, "success_rate.epoch.env.science": 0.6095008051529791, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.50115325298616, "success_rate.epoch.global": 0.5708418891170431, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9915865384615384, "tokens_p.mean_in_band": 0.615234375, "tokens_rate.above_band": 0.8904109589041096, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1095890410958904 }, { "epoch": 0.1753656914893617, "grad_norm": 162.11820232870596, "learning_rate": 1.9953260859945009e-07, "loss": 1.0052, "step": 1055, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4230769230769231, "success_rate.epoch.env.logic": 0.35564853556485354, "success_rate.epoch.env.math": 0.8178294573643411, "success_rate.epoch.env.sat": 0.06097560975609756, "success_rate.epoch.env.science": 0.610128617363344, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5000902251326266, "success_rate.epoch.global": 0.5705521472392638, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.2222222222222222, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.993865948533812, "tokens_p.mean_in_band": 0.5909261067708333, "tokens_rate.above_band": 0.8744113029827315, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12558869701726844 }, { "epoch": 0.1761968085106383, "grad_norm": 140.1974730223309, "learning_rate": 1.995268647938246e-07, "loss": 0.8321, "step": 1060, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4230769230769231, "success_rate.epoch.env.logic": 0.36099585062240663, "success_rate.epoch.env.math": 0.8185328185328186, "success_rate.epoch.env.sat": 0.06097560975609756, "success_rate.epoch.env.science": 0.6108887109687751, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5007093860263958, "success_rate.epoch.global": 0.5717922606924644, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.991822429906542, "tokens_p.mean_in_band": 0.6958451704545454, "tokens_rate.above_band": 0.9067796610169492, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09322033898305085 }, { "epoch": 0.1770279255319149, "grad_norm": 642.0174638685222, "learning_rate": 1.9952108633279797e-07, "loss": 0.8895, "step": 1065, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4230769230769231, "success_rate.epoch.env.logic": 0.36363636363636365, "success_rate.epoch.env.math": 0.8192307692307692, "success_rate.epoch.env.sat": 0.060240963855421686, "success_rate.epoch.env.science": 0.6090764331210191, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5007813442048936, "success_rate.epoch.global": 0.5709219858156028, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.2857142857142857, "success_rate.window.env_macro_mean": 0.5714285714285714, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.994092039800995, "tokens_p.mean_below_band": 3.510081114654895e-12, "tokens_p.mean_in_band": 0.5476310483870968, "tokens_rate.above_band": 0.8626609442060086, "tokens_rate.below_band": 0.004291845493562232, "tokens_rate.in_band": 0.13304721030042918 }, { "epoch": 0.1778590425531915, "grad_norm": 265.81020007173316, "learning_rate": 1.9951527322662555e-07, "loss": 0.9334, "step": 1070, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4230769230769231, "success_rate.epoch.env.logic": 0.35918367346938773, "success_rate.epoch.env.math": 0.8192307692307692, "success_rate.epoch.env.sat": 0.060240963855421686, "success_rate.epoch.env.science": 0.6098334655035687, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5004453753154002, "success_rate.epoch.global": 0.570635721493441, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.2222222222222222, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9998149792776792, "tokens_p.mean_in_band": 0.562548225308642, "tokens_rate.above_band": 0.9542372881355933, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04576271186440678 }, { "epoch": 0.17869015957446807, "grad_norm": 226.09530519572337, "learning_rate": 1.9950942548562418e-07, "loss": 1.1182, "step": 1075, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4230769230769231, "success_rate.epoch.env.logic": 0.3603238866396761, "success_rate.epoch.env.math": 0.8212927756653993, "success_rate.epoch.env.sat": 0.060240963855421686, "success_rate.epoch.env.science": 0.6097946287519748, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5007329555747935, "success_rate.epoch.global": 0.571285140562249, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.7000000000000001, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9977156432748538, "tokens_p.mean_in_band": 0.5841238839285714, "tokens_rate.above_band": 0.9513212795549374, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.048678720445062586 }, { "epoch": 0.17952127659574468, "grad_norm": 264.55522665586335, "learning_rate": 1.9950354312017216e-07, "loss": 0.9752, "step": 1080, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39285714285714285, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.41509433962264153, "success_rate.epoch.env.logic": 0.3603238866396761, "success_rate.epoch.env.math": 0.8212927756653993, "success_rate.epoch.env.sat": 0.060240963855421686, "success_rate.epoch.env.science": 0.6092767295597484, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49931083377576024, "success_rate.epoch.global": 0.5705, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.125, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9958608989223844, "tokens_p.mean_below_band": 2.169981598854065e-07, "tokens_p.mean_in_band": 0.5555615011415526, "tokens_rate.above_band": 0.9172922922922923, "tokens_rate.below_band": 0.0005005005005005005, "tokens_rate.in_band": 0.08220720720720721 }, { "epoch": 0.18035239361702127, "grad_norm": 460.79970668050754, "learning_rate": 1.9949762614070927e-07, "loss": 0.9209, "step": 1085, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39285714285714285, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.41509433962264153, "success_rate.epoch.env.logic": 0.3629032258064516, "success_rate.epoch.env.math": 0.821969696969697, "success_rate.epoch.env.sat": 0.060240963855421686, "success_rate.epoch.env.science": 0.6095461658841941, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49963135166626194, "success_rate.epoch.global": 0.5712151394422311, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.7380952380952381, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9987753991291727, "tokens_p.mean_in_band": 0.6100983796296297, "tokens_rate.above_band": 0.9622905027932961, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03770949720670391 }, { "epoch": 0.18118351063829788, "grad_norm": 178.41552085802275, "learning_rate": 1.9949167455773669e-07, "loss": 0.9215, "step": 1090, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39285714285714285, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.41509433962264153, "success_rate.epoch.env.logic": 0.364, "success_rate.epoch.env.math": 0.8226415094339623, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.6107644305772231, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4998376877589193, "success_rate.epoch.global": 0.5719246031746031, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9952598314606742, "tokens_p.mean_below_band": 9.255018085241318e-09, "tokens_p.mean_in_band": 0.6237847222222223, "tokens_rate.above_band": 0.9393139841688655, "tokens_rate.below_band": 0.0013192612137203166, "tokens_rate.in_band": 0.059366754617414245 }, { "epoch": 0.18201462765957446, "grad_norm": 219.7736325707439, "learning_rate": 1.9948568838181696e-07, "loss": 1.0282, "step": 1095, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39285714285714285, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.41509433962264153, "success_rate.epoch.env.logic": 0.36363636363636365, "success_rate.epoch.env.math": 0.8226415094339623, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.609472049689441, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49968714073606313, "success_rate.epoch.global": 0.5708641975308641, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.3333333333333333, "tokens_p.mean_above_band": 0.9967320261437909, "tokens_p.mean_in_band": 0.5900735294117647, "tokens_rate.above_band": 0.9375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0625 }, { "epoch": 0.18284574468085107, "grad_norm": 224.39910614315582, "learning_rate": 1.994796676235742e-07, "loss": 1.0531, "step": 1100, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39655172413793105, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.41509433962264153, "success_rate.epoch.env.logic": 0.36220472440944884, "success_rate.epoch.env.math": 0.8226415094339623, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.6102088167053364, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49995984156058754, "success_rate.epoch.global": 0.5710772257747172, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.29166666666666663, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9987969201154957, "tokens_p.mean_in_band": 0.5738044507575758, "tokens_rate.above_band": 0.9402714932126697, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05972850678733032 }, { "epoch": 0.18367686170212766, "grad_norm": 266.2200682538834, "learning_rate": 1.9947361229369366e-07, "loss": 0.8472, "step": 1105, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39655172413793105, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.41509433962264153, "success_rate.epoch.env.logic": 0.3607843137254902, "success_rate.epoch.env.math": 0.8233082706766918, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.611837048424289, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5000393490403806, "success_rate.epoch.global": 0.5721977484092021, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.999702380952381, "tokens_p.mean_below_band": 1.5688783605583012e-11, "tokens_p.mean_in_band": 0.6096643518518519, "tokens_rate.above_band": 0.9574468085106383, "tokens_rate.below_band": 0.001519756838905775, "tokens_rate.in_band": 0.041033434650455926 }, { "epoch": 0.18450797872340424, "grad_norm": 169.38340095825396, "learning_rate": 1.9946752240292212e-07, "loss": 0.9755, "step": 1110, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.3898305084745763, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.41509433962264153, "success_rate.epoch.env.logic": 0.3607843137254902, "success_rate.epoch.env.math": 0.8252788104089219, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.6125574272588055, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4996729583952343, "success_rate.epoch.global": 0.5730994152046783, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9977379356568364, "tokens_p.mean_in_band": 0.51043701171875, "tokens_rate.above_band": 0.9209876543209876, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07901234567901234 }, { "epoch": 0.18533909574468085, "grad_norm": 152.13962411654154, "learning_rate": 1.9946139796206767e-07, "loss": 0.8007, "step": 1115, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.3898305084745763, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.41509433962264153, "success_rate.epoch.env.logic": 0.35797665369649806, "success_rate.epoch.env.math": 0.8252788104089219, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.6137404580152672, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4995252648250043, "success_rate.epoch.global": 0.5733722060252673, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9983397016011645, "tokens_p.mean_in_band": 0.6006634424603174, "tokens_rate.above_band": 0.9561586638830898, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04384133611691023 }, { "epoch": 0.18617021276595744, "grad_norm": 439.98687769966773, "learning_rate": 1.9945523898199963e-07, "loss": 1.0224, "step": 1120, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.3898305084745763, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.42592592592592593, "success_rate.epoch.env.logic": 0.35658914728682173, "success_rate.epoch.env.math": 0.8252788104089219, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.6135155656795748, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5003633727848148, "success_rate.epoch.global": 0.5732946298984035, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6428571428571428, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9929454851104706, "tokens_p.mean_in_band": 0.5964936755952381, "tokens_rate.above_band": 0.961218836565097, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038781163434903045 }, { "epoch": 0.18700132978723405, "grad_norm": 442.11912082659296, "learning_rate": 1.9944904547364873e-07, "loss": 0.9086, "step": 1125, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.3898305084745763, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.42592592592592593, "success_rate.epoch.env.logic": 0.35658914728682173, "success_rate.epoch.env.math": 0.8252788104089219, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.6131221719457014, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5003276097180991, "success_rate.epoch.global": 0.5732177263969171, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5555555555555556, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.986204954954955, "tokens_p.mean_in_band": 0.4508272058823529, "tokens_rate.above_band": 0.7655172413793103, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.23448275862068965 }, { "epoch": 0.18783244680851063, "grad_norm": 120.69277736939155, "learning_rate": 1.9944281744800692e-07, "loss": 0.8571, "step": 1130, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.3898305084745763, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.42592592592592593, "success_rate.epoch.env.logic": 0.3576923076923077, "success_rate.epoch.env.math": 0.8252788104089219, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.6133633633633634, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5004498235202034, "success_rate.epoch.global": 0.5734165067178503, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4047619047619048, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 1.0001788268955651, "tokens_p.mean_below_band": 1.2278178473934531e-11, "tokens_p.mean_in_band": 0.5243626644736842, "tokens_rate.above_band": 0.9728601252609603, "tokens_rate.below_band": 0.0006958942240779402, "tokens_rate.in_band": 0.026443980514961725 }, { "epoch": 0.18866356382978725, "grad_norm": 542.5901995373054, "learning_rate": 1.9943655491612742e-07, "loss": 1.1015, "step": 1135, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.3898305084745763, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.42592592592592593, "success_rate.epoch.env.logic": 0.3574144486692015, "success_rate.epoch.env.math": 0.8252788104089219, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.6123973114264376, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.500336740705655, "success_rate.epoch.global": 0.5725883476599809, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.38095238095238093, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9929191842900302, "tokens_p.mean_in_band": 0.6369047619047619, "tokens_rate.above_band": 0.8873994638069705, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1126005361930295 }, { "epoch": 0.18949468085106383, "grad_norm": 269.63365533118093, "learning_rate": 1.9943025788912467e-07, "loss": 1.1228, "step": 1140, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.3898305084745763, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43636363636363634, "success_rate.epoch.env.logic": 0.3574144486692015, "success_rate.epoch.env.math": 0.825925925925926, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.6136701337295691, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5014601632745501, "success_rate.epoch.global": 0.5739419876367094, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9965156794425087, "tokens_p.mean_in_band": 0.6739211309523809, "tokens_rate.above_band": 0.9318181818181818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06818181818181818 }, { "epoch": 0.1903257978723404, "grad_norm": 143.92137110226676, "learning_rate": 1.9942392637817437e-07, "loss": 0.9558, "step": 1145, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.3898305084745763, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43636363636363634, "success_rate.epoch.env.logic": 0.36466165413533835, "success_rate.epoch.env.math": 0.825925925925926, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.613905325443787, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5021403812000369, "success_rate.epoch.global": 0.5748106060606061, "success_rate.window.env.logic": 0.75, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7083333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9985868863049095, "tokens_p.mean_in_band": 0.5771205357142857, "tokens_rate.above_band": 0.9567367119901112, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04326328800988875 }, { "epoch": 0.19115691489361702, "grad_norm": 187.30544503821565, "learning_rate": 1.9941756039451342e-07, "loss": 0.9217, "step": 1150, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.3898305084745763, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43636363636363634, "success_rate.epoch.env.logic": 0.36704119850187267, "success_rate.epoch.env.math": 0.8278388278388278, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.6143067846607669, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5025034380531616, "success_rate.epoch.global": 0.5756718528995757, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9932544052863436, "tokens_p.mean_in_band": 0.7085433467741935, "tokens_rate.above_band": 0.8798449612403101, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12015503875968993 }, { "epoch": 0.1919880319148936, "grad_norm": 1507.552530199978, "learning_rate": 1.9941115994943982e-07, "loss": 0.9314, "step": 1155, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.3898305084745763, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43636363636363634, "success_rate.epoch.env.logic": 0.36704119850187267, "success_rate.epoch.env.math": 0.8284671532846716, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.6158357771260997, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5026995578632686, "success_rate.epoch.global": 0.5769953051643193, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9916857798165137, "tokens_p.mean_in_band": 0.6243489583333334, "tokens_rate.above_band": 0.8582677165354331, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14173228346456693 }, { "epoch": 0.19281914893617022, "grad_norm": 340.14798639891603, "learning_rate": 1.9940472505431285e-07, "loss": 0.9579, "step": 1160, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39344262295081966, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43636363636363634, "success_rate.epoch.env.logic": 0.36704119850187267, "success_rate.epoch.env.math": 0.8290909090909091, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.6167883211678832, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5031712318927472, "success_rate.epoch.global": 0.5778401122019635, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.7380952380952381, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.998371138996139, "tokens_p.mean_in_band": 0.6483623798076923, "tokens_rate.above_band": 0.975517890772128, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02448210922787194 }, { "epoch": 0.1936502659574468, "grad_norm": 251.58210047327617, "learning_rate": 1.9939825572055284e-07, "loss": 1.0158, "step": 1165, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.39344262295081966, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43636363636363634, "success_rate.epoch.env.logic": 0.3643122676579926, "success_rate.epoch.env.math": 0.8297101449275363, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.6167272727272727, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5029738915793049, "success_rate.epoch.global": 0.5775500698649279, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.611111111111111, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9965024330900243, "tokens_p.mean_in_band": 0.5450114678899083, "tokens_rate.above_band": 0.9187779433681073, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08122205663189269 }, { "epoch": 0.19448138297872342, "grad_norm": 166.83858758307767, "learning_rate": 1.9939175195964128e-07, "loss": 0.7169, "step": 1170, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4032258064516129, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43636363636363634, "success_rate.epoch.env.logic": 0.3643122676579926, "success_rate.epoch.env.math": 0.8315412186379928, "success_rate.epoch.env.sat": 0.05747126436781609, "success_rate.epoch.env.science": 0.6171138506163887, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5039419434026157, "success_rate.epoch.global": 0.5781177561427909, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9959446564885496, "tokens_p.mean_in_band": 0.6717862215909091, "tokens_rate.above_band": 0.8993135011441648, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10068649885583524 }, { "epoch": 0.1953125, "grad_norm": 668.3259426053454, "learning_rate": 1.9938521378312078e-07, "loss": 0.7603, "step": 1175, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4032258064516129, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43636363636363634, "success_rate.epoch.env.logic": 0.36666666666666664, "success_rate.epoch.env.math": 0.8291814946619217, "success_rate.epoch.env.sat": 0.05747126436781609, "success_rate.epoch.env.science": 0.6177745664739884, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5040015243926341, "success_rate.epoch.global": 0.5787528868360278, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9927884615384616, "tokens_p.mean_in_band": 0.6216216216216216, "tokens_rate.above_band": 0.7944444444444444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.20555555555555555 }, { "epoch": 0.19614361702127658, "grad_norm": 224.9222325548951, "learning_rate": 1.9937864120259492e-07, "loss": 0.9413, "step": 1180, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4032258064516129, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43636363636363634, "success_rate.epoch.env.logic": 0.36531365313653136, "success_rate.epoch.env.math": 0.8309859154929577, "success_rate.epoch.env.sat": 0.05747126436781609, "success_rate.epoch.env.science": 0.619150467962563, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5041676433734955, "success_rate.epoch.global": 0.5800367985280589, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9911404639175257, "tokens_p.mean_in_band": 0.6908735795454546, "tokens_rate.above_band": 0.9463414634146341, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05365853658536585 }, { "epoch": 0.1969747340425532, "grad_norm": 182.2938333894031, "learning_rate": 1.9937203422972855e-07, "loss": 0.8178, "step": 1185, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4032258064516129, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43636363636363634, "success_rate.epoch.env.logic": 0.3639705882352941, "success_rate.epoch.env.math": 0.8315789473684211, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.6205164992826399, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5041642720775562, "success_rate.epoch.global": 0.580659945004583, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9918488777089783, "tokens_p.mean_in_band": 0.5329213876705653, "tokens_rate.above_band": 0.8831168831168831, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11688311688311688 }, { "epoch": 0.19780585106382978, "grad_norm": 270.4151080623194, "learning_rate": 1.9936539287624733e-07, "loss": 0.9748, "step": 1190, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4126984126984127, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43636363636363634, "success_rate.epoch.env.logic": 0.3639705882352941, "success_rate.epoch.env.math": 0.8327526132404182, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.622412562455389, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5053044843767878, "success_rate.epoch.global": 0.5825729927007299, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9959124472573839, "tokens_p.mean_in_band": 0.6805098684210527, "tokens_rate.above_band": 0.92578125, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07421875 }, { "epoch": 0.1986369680851064, "grad_norm": 187.44005324716238, "learning_rate": 1.9935871715393803e-07, "loss": 0.7685, "step": 1195, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4126984126984127, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43636363636363634, "success_rate.epoch.env.logic": 0.36496350364963503, "success_rate.epoch.env.math": 0.8327526132404182, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.6228693181818182, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5054362726623123, "success_rate.epoch.global": 0.582916855974557, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9977438556067588, "tokens_p.mean_in_band": 0.527294921875, "tokens_rate.above_band": 0.9421128798842258, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05788712011577424 }, { "epoch": 0.19946808510638298, "grad_norm": 173.36894907611784, "learning_rate": 1.9935200707464848e-07, "loss": 0.9895, "step": 1200, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4126984126984127, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43636363636363634, "success_rate.epoch.env.logic": 0.36363636363636365, "success_rate.epoch.env.math": 0.8339100346020761, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.6242038216560509, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5055421621916414, "success_rate.epoch.global": 0.583974649162517, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9961963660245184, "tokens_p.mean_in_band": 0.5931165540540541, "tokens_rate.above_band": 0.9391447368421053, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06085526315789474 }, { "epoch": 0.2002992021276596, "grad_norm": 162.61905989296628, "learning_rate": 1.993452626502874e-07, "loss": 0.7882, "step": 1205, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4126984126984127, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43636363636363634, "success_rate.epoch.env.logic": 0.36363636363636365, "success_rate.epoch.env.math": 0.8350515463917526, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.6250880902043693, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49663541494964086, "success_rate.epoch.global": 0.584761045987376, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9899193548387096, "tokens_p.mean_in_band": 0.60546875, "tokens_rate.above_band": 0.84765625, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15234375 }, { "epoch": 0.20113031914893617, "grad_norm": 214.75697790751957, "learning_rate": 1.993384838928245e-07, "loss": 1.2434, "step": 1210, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4126984126984127, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.36363636363636365, "success_rate.epoch.env.math": 0.8350515463917526, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.6248246844319776, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4975264630671448, "success_rate.epoch.global": 0.5849056603773585, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9907029750479847, "tokens_p.mean_in_band": 0.6961300872093024, "tokens_rate.above_band": 0.9237588652482269, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07624113475177305 }, { "epoch": 0.20196143617021275, "grad_norm": 174.26995888180224, "learning_rate": 1.9933167081429038e-07, "loss": 0.7754, "step": 1215, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4126984126984127, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.36594202898550726, "success_rate.epoch.env.math": 0.8333333333333334, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.6261355695317959, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4976990391935577, "success_rate.epoch.global": 0.5861297539149888, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9889787946428571, "tokens_p.mean_in_band": 0.7231026785714286, "tokens_rate.above_band": 0.8648648648648649, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13513513513513514 }, { "epoch": 0.20279255319148937, "grad_norm": 492.1817572691923, "learning_rate": 1.993248234267766e-07, "loss": 1.1205, "step": 1220, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4126984126984127, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.36594202898550726, "success_rate.epoch.env.math": 0.8338983050847457, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.6256089074460682, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4977025218904382, "success_rate.epoch.global": 0.5860838537020517, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9901960784313726, "tokens_p.mean_in_band": 0.6036458333333333, "tokens_rate.above_band": 0.7727272727272727, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.22727272727272727 }, { "epoch": 0.20362367021276595, "grad_norm": 271.5292273195168, "learning_rate": 1.9931794174243564e-07, "loss": 1.0452, "step": 1225, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.40625, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.36594202898550726, "success_rate.epoch.env.math": 0.831081081081081, "success_rate.epoch.env.sat": 0.056179775280898875, "success_rate.epoch.env.science": 0.6257796257796258, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49681767417172884, "success_rate.epoch.global": 0.5855175477565526, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.17857142857142858, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9943181818181818, "tokens_p.mean_in_band": 0.6185360863095238, "tokens_rate.above_band": 0.8719512195121951, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12804878048780488 }, { "epoch": 0.20445478723404256, "grad_norm": 136.54441249915374, "learning_rate": 1.9931102577348075e-07, "loss": 0.9196, "step": 1230, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.36462093862815886, "success_rate.epoch.env.math": 0.831081081081081, "success_rate.epoch.env.sat": 0.05555555555555555, "success_rate.epoch.env.science": 0.6258644536652835, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4969068038893539, "success_rate.epoch.global": 0.5850310008857396, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9966831841432225, "tokens_p.mean_in_band": 0.6754705255681818, "tokens_rate.above_band": 0.9467312348668281, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.053268765133171914 }, { "epoch": 0.20528590425531915, "grad_norm": 347.9254341197953, "learning_rate": 1.9930407553218612e-07, "loss": 1.0113, "step": 1235, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.36200716845878134, "success_rate.epoch.env.math": 0.8294314381270903, "success_rate.epoch.env.sat": 0.054945054945054944, "success_rate.epoch.env.science": 0.6259489302967564, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4964714005164088, "success_rate.epoch.global": 0.5844728716365241, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.29166666666666663, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9984069668649108, "tokens_p.mean_below_band": 3.1650415621697903e-10, "tokens_p.mean_in_band": 0.5015914351851852, "tokens_rate.above_band": 0.9348689436060366, "tokens_rate.below_band": 0.0007942811755361397, "tokens_rate.in_band": 0.06433677521842732 }, { "epoch": 0.20611702127659576, "grad_norm": 166.51054372529995, "learning_rate": 1.9929709103088669e-07, "loss": 0.9434, "step": 1240, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45614035087719296, "success_rate.epoch.env.logic": 0.36200716845878134, "success_rate.epoch.env.math": 0.83, "success_rate.epoch.env.sat": 0.054945054945054944, "success_rate.epoch.env.science": 0.6247422680412371, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4972962804315007, "success_rate.epoch.global": 0.5841758241758241, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9945047669491526, "tokens_p.mean_in_band": 0.5641276041666666, "tokens_rate.above_band": 0.9291338582677166, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07086614173228346 }, { "epoch": 0.20694813829787234, "grad_norm": 225.06886360968747, "learning_rate": 1.9929007228197822e-07, "loss": 0.8741, "step": 1245, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45614035087719296, "success_rate.epoch.env.logic": 0.36200716845878134, "success_rate.epoch.env.math": 0.8305647840531561, "success_rate.epoch.env.sat": 0.054945054945054944, "success_rate.epoch.env.science": 0.6247436773752563, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4973477525576076, "success_rate.epoch.global": 0.5845008756567426, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5555555555555556, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9834947183098591, "tokens_p.mean_below_band": 4.1443854570388794e-08, "tokens_p.mean_in_band": 0.6045572916666667, "tokens_rate.above_band": 0.8208092485549133, "tokens_rate.below_band": 0.005780346820809248, "tokens_rate.in_band": 0.17341040462427745 }, { "epoch": 0.20777925531914893, "grad_norm": 155.03719314404663, "learning_rate": 1.9928301929791736e-07, "loss": 1.0002, "step": 1250, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45614035087719296, "success_rate.epoch.env.logic": 0.36200716845878134, "success_rate.epoch.env.math": 0.8327868852459016, "success_rate.epoch.env.sat": 0.054945054945054944, "success_rate.epoch.env.science": 0.6232970027247956, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4974182458796335, "success_rate.epoch.global": 0.5843872655909289, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.2, "success_rate.window.env_macro_mean": 0.7333333333333334, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9945463988919667, "tokens_p.mean_below_band": 2.342858351767063e-09, "tokens_p.mean_in_band": 0.576171875, "tokens_rate.above_band": 0.9304123711340206, "tokens_rate.below_band": 0.002577319587628866, "tokens_rate.in_band": 0.06701030927835051 }, { "epoch": 0.20861037234042554, "grad_norm": 167.2013906845457, "learning_rate": 1.9927593209122133e-07, "loss": 0.8269, "step": 1255, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45614035087719296, "success_rate.epoch.env.logic": 0.36298932384341637, "success_rate.epoch.env.math": 0.8349514563106796, "success_rate.epoch.env.sat": 0.054945054945054944, "success_rate.epoch.env.science": 0.6240652617267165, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4985759298906898, "success_rate.epoch.global": 0.5857577073382545, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975234619395204, "tokens_p.mean_in_band": 0.622750946969697, "tokens_rate.above_band": 0.9667338709677419, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03326612903225806 }, { "epoch": 0.20944148936170212, "grad_norm": 151.13123156120764, "learning_rate": 1.9926881067446826e-07, "loss": 0.8284, "step": 1260, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45614035087719296, "success_rate.epoch.env.logic": 0.3674911660777385, "success_rate.epoch.env.math": 0.8360128617363344, "success_rate.epoch.env.sat": 0.05434782608695652, "success_rate.epoch.env.science": 0.6239837398373984, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49901997506455886, "success_rate.epoch.global": 0.5862516212710766, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.65, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9899038461538462, "tokens_p.mean_in_band": 0.6902629573170732, "tokens_rate.above_band": 0.8879781420765027, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11202185792349727 }, { "epoch": 0.21027260638297873, "grad_norm": 220.56530840404304, "learning_rate": 1.9926165506029685e-07, "loss": 1.1713, "step": 1265, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45614035087719296, "success_rate.epoch.env.logic": 0.3674911660777385, "success_rate.epoch.env.math": 0.8338658146964856, "success_rate.epoch.env.sat": 0.053763440860215055, "success_rate.epoch.env.science": 0.6239027683997299, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4987643020005354, "success_rate.epoch.global": 0.5859543300301594, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4047619047619048, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9901315789473685, "tokens_p.mean_in_band": 0.6073069852941176, "tokens_rate.above_band": 0.8172043010752689, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1827956989247312 }, { "epoch": 0.21110372340425532, "grad_norm": 313.2592446435684, "learning_rate": 1.9925446526140663e-07, "loss": 1.0604, "step": 1270, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46551724137931033, "success_rate.epoch.env.logic": 0.3674911660777385, "success_rate.epoch.env.math": 0.8322784810126582, "success_rate.epoch.env.sat": 0.053763440860215055, "success_rate.epoch.env.science": 0.6238223418573351, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49946513202561676, "success_rate.epoch.global": 0.5862660944206008, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7222222222222222, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9954490291262136, "tokens_p.mean_in_band": 0.5971258254278273, "tokens_rate.above_band": 0.9363636363636364, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06363636363636363 }, { "epoch": 0.21193484042553193, "grad_norm": 280.80643101910096, "learning_rate": 1.992472412905577e-07, "loss": 0.8185, "step": 1275, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46551724137931033, "success_rate.epoch.env.logic": 0.3674911660777385, "success_rate.epoch.env.math": 0.8322784810126582, "success_rate.epoch.env.sat": 0.05319148936170213, "success_rate.epoch.env.science": 0.6239946380697051, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4994287997268765, "success_rate.epoch.global": 0.5862216516902011, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9968988549618321, "tokens_p.mean_in_band": 0.6690705128205128, "tokens_rate.above_band": 0.9307282415630551, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06927175843694494 }, { "epoch": 0.2127659574468085, "grad_norm": 220.10210274905896, "learning_rate": 1.992399831605708e-07, "loss": 1.0441, "step": 1280, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46551724137931033, "success_rate.epoch.env.logic": 0.3674911660777385, "success_rate.epoch.env.math": 0.8333333333333334, "success_rate.epoch.env.sat": 0.05319148936170213, "success_rate.epoch.env.science": 0.6230820547031354, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49944173326815877, "success_rate.epoch.global": 0.5861040068201193, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.375, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9864738805970149, "tokens_p.mean_in_band": 0.60931396484375, "tokens_rate.above_band": 0.7701149425287356, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.22988505747126436 }, { "epoch": 0.2135970744680851, "grad_norm": 138.861396860042, "learning_rate": 1.9923269088432745e-07, "loss": 0.7988, "step": 1285, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46551724137931033, "success_rate.epoch.env.logic": 0.36713286713286714, "success_rate.epoch.env.math": 0.83125, "success_rate.epoch.env.sat": 0.05319148936170213, "success_rate.epoch.env.science": 0.6240851630073186, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4993109583614295, "success_rate.epoch.global": 0.586411889596603, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.611111111111111, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9977506426735219, "tokens_p.mean_in_band": 0.6450095663265306, "tokens_rate.above_band": 0.9407496977025392, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0592503022974607 }, { "epoch": 0.2144281914893617, "grad_norm": 277.11644678655375, "learning_rate": 1.9922536447476952e-07, "loss": 0.875, "step": 1290, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4576271186440678, "success_rate.epoch.env.logic": 0.36713286713286714, "success_rate.epoch.env.math": 0.8312883435582822, "success_rate.epoch.env.sat": 0.05319148936170213, "success_rate.epoch.env.science": 0.6241699867197875, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4986048715010212, "success_rate.epoch.global": 0.586892177589852, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 0.8333333333333334, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9893594457013575, "tokens_p.mean_in_band": 0.6767982219827586, "tokens_rate.above_band": 0.859086491739553, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14091350826044705 }, { "epoch": 0.2152593085106383, "grad_norm": 221.7188818829395, "learning_rate": 1.9921800394489965e-07, "loss": 0.907, "step": 1295, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.36713286713286714, "success_rate.epoch.env.math": 0.8318042813455657, "success_rate.epoch.env.sat": 0.05263157894736842, "success_rate.epoch.env.science": 0.6242544731610338, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4994303316680026, "success_rate.epoch.global": 0.5870940531421341, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6166666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9959917971662938, "tokens_p.mean_in_band": 0.6155225409836066, "tokens_rate.above_band": 0.956490727532097, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043509272467902996 }, { "epoch": 0.2160904255319149, "grad_norm": 229.3978572403193, "learning_rate": 1.99210609307781e-07, "loss": 1.085, "step": 1300, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.36713286713286714, "success_rate.epoch.env.math": 0.8323170731707317, "success_rate.epoch.env.sat": 0.05263157894736842, "success_rate.epoch.env.science": 0.6249176005273567, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4995372334126834, "success_rate.epoch.global": 0.5878151260504202, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9894911504424779, "tokens_p.mean_below_band": 1.126900315284729e-07, "tokens_p.mean_in_band": 0.72314453125, "tokens_rate.above_band": 0.8692307692307693, "tokens_rate.below_band": 0.007692307692307693, "tokens_rate.in_band": 0.12307692307692308 }, { "epoch": 0.2169215425531915, "grad_norm": 163.0903824275352, "learning_rate": 1.992031805765372e-07, "loss": 1.0955, "step": 1305, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.36585365853658536, "success_rate.epoch.env.math": 0.8308157099697885, "success_rate.epoch.env.sat": 0.052083333333333336, "success_rate.epoch.env.science": 0.6259040105193951, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49932428728366324, "success_rate.epoch.global": 0.5881121808287987, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.3666666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.994140625, "tokens_p.mean_below_band": 8.149072527885437e-09, "tokens_p.mean_in_band": 0.6324637276785714, "tokens_rate.above_band": 0.8585365853658536, "tokens_rate.below_band": 0.004878048780487805, "tokens_rate.in_band": 0.13658536585365855 }, { "epoch": 0.21775265957446807, "grad_norm": 384.7668506257618, "learning_rate": 1.9919571776435243e-07, "loss": 0.9975, "step": 1310, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.36585365853658536, "success_rate.epoch.env.math": 0.8308157099697885, "success_rate.epoch.env.sat": 0.052083333333333336, "success_rate.epoch.env.science": 0.6254901960784314, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49928666778903014, "success_rate.epoch.global": 0.5879899916597164, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9840856481481481, "tokens_p.mean_in_band": 0.6072916666666667, "tokens_rate.above_band": 0.782608695652174, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.21739130434782608 }, { "epoch": 0.21858377659574468, "grad_norm": 158.06117561136978, "learning_rate": 1.9918822088447138e-07, "loss": 0.8129, "step": 1315, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.3689655172413793, "success_rate.epoch.env.math": 0.8308157099697885, "success_rate.epoch.env.sat": 0.052083333333333336, "success_rate.epoch.env.science": 0.6267100977198697, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4996804641841422, "success_rate.epoch.global": 0.5889443059019119, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7619047619047619, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9909511784511784, "tokens_p.mean_in_band": 0.7170138888888888, "tokens_rate.above_band": 0.9428571428571428, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05714285714285714 }, { "epoch": 0.21941489361702127, "grad_norm": 279.1660130433892, "learning_rate": 1.9918068995019918e-07, "loss": 0.9803, "step": 1320, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.3689655172413793, "success_rate.epoch.env.math": 0.8313253012048193, "success_rate.epoch.env.sat": 0.052083333333333336, "success_rate.epoch.env.science": 0.626701231367466, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49972598462801743, "success_rate.epoch.global": 0.5892339544513457, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.986572265625, "tokens_p.mean_in_band": 0.5660714285714286, "tokens_rate.above_band": 0.7852760736196319, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2147239263803681 }, { "epoch": 0.22024601063829788, "grad_norm": 280.84577768197113, "learning_rate": 1.9917312497490136e-07, "loss": 0.8061, "step": 1325, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.3689655172413793, "success_rate.epoch.env.math": 0.8293413173652695, "success_rate.epoch.env.sat": 0.05154639175257732, "success_rate.epoch.env.science": 0.6268560361523564, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4995108827520706, "success_rate.epoch.global": 0.5891089108910891, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4047619047619048, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9893790849673203, "tokens_p.mean_below_band": 2.514570951461792e-08, "tokens_p.mean_in_band": 0.6851806640625, "tokens_rate.above_band": 0.8225806451612904, "tokens_rate.below_band": 0.005376344086021506, "tokens_rate.in_band": 0.17204301075268819 }, { "epoch": 0.22107712765957446, "grad_norm": 137.6809814754868, "learning_rate": 1.991655259720039e-07, "loss": 0.7748, "step": 1330, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.36860068259385664, "success_rate.epoch.env.math": 0.8293413173652695, "success_rate.epoch.env.sat": 0.05154639175257732, "success_rate.epoch.env.science": 0.6265292981326465, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49944801250959486, "success_rate.epoch.global": 0.5886466474701769, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.48333333333333334, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 1.0004780595813205, "tokens_p.mean_in_band": 0.6159420289855072, "tokens_rate.above_band": 0.9473684210526315, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05263157894736842 }, { "epoch": 0.22190824468085107, "grad_norm": 322.99453023291244, "learning_rate": 1.9915789295499317e-07, "loss": 0.9512, "step": 1335, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47540983606557374, "success_rate.epoch.env.logic": 0.36860068259385664, "success_rate.epoch.env.math": 0.8293413173652695, "success_rate.epoch.env.sat": 0.05154639175257732, "success_rate.epoch.env.science": 0.6277278562259306, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5003518059179759, "success_rate.epoch.global": 0.5896594173163726, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963149566955363, "tokens_p.mean_in_band": 0.6804596656976745, "tokens_rate.above_band": 0.9721502590673575, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027849740932642485 }, { "epoch": 0.22273936170212766, "grad_norm": 227.33008092202485, "learning_rate": 1.9915022593741594e-07, "loss": 0.958, "step": 1340, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47540983606557374, "success_rate.epoch.env.logic": 0.3728813559322034, "success_rate.epoch.env.math": 0.8293413173652695, "success_rate.epoch.env.sat": 0.05154639175257732, "success_rate.epoch.env.science": 0.6282789507357646, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5007910575405378, "success_rate.epoch.global": 0.5904255319148937, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9895334928229665, "tokens_p.mean_in_band": 0.6583180147058824, "tokens_rate.above_band": 0.9247787610619469, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0752212389380531 }, { "epoch": 0.22357047872340424, "grad_norm": 336.850707124167, "learning_rate": 1.9914252493287925e-07, "loss": 0.8684, "step": 1345, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47540983606557374, "success_rate.epoch.env.logic": 0.3728813559322034, "success_rate.epoch.env.math": 0.827893175074184, "success_rate.epoch.env.sat": 0.05102040816326531, "success_rate.epoch.env.science": 0.6279514996809189, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5005818232736975, "success_rate.epoch.global": 0.5901305057096248, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9941275167785235, "tokens_p.mean_in_band": 0.63125, "tokens_rate.above_band": 0.8097826086956522, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.19021739130434784 }, { "epoch": 0.22440159574468085, "grad_norm": 122.73715385934305, "learning_rate": 1.9913478995505052e-07, "loss": 0.8879, "step": 1350, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.417910447761194, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47540983606557374, "success_rate.epoch.env.logic": 0.3728813559322034, "success_rate.epoch.env.math": 0.8259587020648967, "success_rate.epoch.env.sat": 0.050505050505050504, "success_rate.epoch.env.science": 0.6296060991105463, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5005095295247998, "success_rate.epoch.global": 0.590982940698619, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9884259259259259, "tokens_p.mean_in_band": 0.6495414402173914, "tokens_rate.above_band": 0.8756756756756757, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12432432432432433 }, { "epoch": 0.22523271276595744, "grad_norm": 169.04876506860631, "learning_rate": 1.9912702101765744e-07, "loss": 0.8978, "step": 1355, "success_rate.epoch.env.abd": 0.4, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47540983606557374, "success_rate.epoch.env.logic": 0.3716216216216216, "success_rate.epoch.env.math": 0.8264705882352941, "success_rate.epoch.env.sat": 0.050505050505050504, "success_rate.epoch.env.science": 0.629512349588347, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4998743168392339, "success_rate.epoch.global": 0.5906882591093118, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.39285714285714285, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9970903869407497, "tokens_p.mean_in_band": 0.5697021484375, "tokens_rate.above_band": 0.9451428571428572, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.054857142857142854 }, { "epoch": 0.22606382978723405, "grad_norm": 196.81713865618812, "learning_rate": 1.9911921813448794e-07, "loss": 0.8737, "step": 1360, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47540983606557374, "success_rate.epoch.env.logic": 0.3745819397993311, "success_rate.epoch.env.math": 0.8264705882352941, "success_rate.epoch.env.sat": 0.050505050505050504, "success_rate.epoch.env.science": 0.6306818181818182, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49797702472752303, "success_rate.epoch.global": 0.591367486889875, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.41666666666666663, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9975503177966102, "tokens_p.mean_in_band": 0.36855394486692017, "tokens_rate.above_band": 0.7821043910521955, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.21789560894780446 }, { "epoch": 0.22689494680851063, "grad_norm": 326.3222385000378, "learning_rate": 1.9911138131939026e-07, "loss": 0.9285, "step": 1365, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47540983606557374, "success_rate.epoch.env.logic": 0.37333333333333335, "success_rate.epoch.env.math": 0.8264705882352941, "success_rate.epoch.env.sat": 0.05, "success_rate.epoch.env.science": 0.6316781898177247, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49790818060614656, "success_rate.epoch.global": 0.5916398713826366, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9929315476190477, "tokens_p.mean_in_band": 0.63037109375, "tokens_rate.above_band": 0.84, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16 }, { "epoch": 0.22772606382978725, "grad_norm": 203.63806184446858, "learning_rate": 1.9910351058627283e-07, "loss": 0.8259, "step": 1370, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47540983606557374, "success_rate.epoch.env.logic": 0.37333333333333335, "success_rate.epoch.env.math": 0.8269794721407625, "success_rate.epoch.env.sat": 0.05, "success_rate.epoch.env.science": 0.6314142678347935, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4979304498718317, "success_rate.epoch.global": 0.5917467948717948, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5555555555555556, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9813262195121951, "tokens_p.mean_in_band": 0.62109375, "tokens_rate.above_band": 0.845360824742268, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15463917525773196 }, { "epoch": 0.22855718085106383, "grad_norm": 253.73287973499293, "learning_rate": 1.9909560594910425e-07, "loss": 1.028, "step": 1375, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.37209302325581395, "success_rate.epoch.env.math": 0.827485380116959, "success_rate.epoch.env.sat": 0.04950495049504951, "success_rate.epoch.env.science": 0.6317103620474407, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4979165664837659, "success_rate.epoch.global": 0.5916167664670658, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.42000000000000004, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9930368922550696, "tokens_p.mean_in_band": 0.5398732311320755, "tokens_rate.above_band": 0.8853558295479126, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11464417045208738 }, { "epoch": 0.2293882978723404, "grad_norm": 153.18270203047197, "learning_rate": 1.990876674219133e-07, "loss": 0.6954, "step": 1380, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.37209302325581395, "success_rate.epoch.env.math": 0.827485380116959, "success_rate.epoch.env.sat": 0.04950495049504951, "success_rate.epoch.env.science": 0.633147113594041, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4980471802607296, "success_rate.epoch.global": 0.5926809864757359, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.98681640625, "tokens_p.mean_in_band": 0.5858028017241379, "tokens_rate.above_band": 0.8152866242038217, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18471337579617833 }, { "epoch": 0.23021941489361702, "grad_norm": 796.8927197865362, "learning_rate": 1.9907969501878895e-07, "loss": 0.7867, "step": 1385, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.37209302325581395, "success_rate.epoch.env.math": 0.8260869565217391, "success_rate.epoch.env.sat": 0.04950495049504951, "success_rate.epoch.env.science": 0.6336019838809671, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49146789619374187, "success_rate.epoch.global": 0.5928571428571429, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9894105450236966, "tokens_p.mean_in_band": 0.6906550480769231, "tokens_rate.above_band": 0.9154013015184381, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08459869848156182 }, { "epoch": 0.2310505319148936, "grad_norm": 242.0445497876273, "learning_rate": 1.9907168875388026e-07, "loss": 0.918, "step": 1390, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.37209302325581395, "success_rate.epoch.env.math": 0.8260869565217391, "success_rate.epoch.env.sat": 0.04807692307692308, "success_rate.epoch.env.science": 0.634342186534898, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49140536666972406, "success_rate.epoch.global": 0.5927243969948597, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.995125, "tokens_p.mean_in_band": 0.6332535282258065, "tokens_rate.above_band": 0.8012820512820513, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1987179487179487 }, { "epoch": 0.23188164893617022, "grad_norm": 246.44527655047452, "learning_rate": 1.9906364864139638e-07, "loss": 0.8848, "step": 1395, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.3741721854304636, "success_rate.epoch.env.math": 0.8270893371757925, "success_rate.epoch.env.sat": 0.05714285714285714, "success_rate.epoch.env.science": 0.6352433764633395, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4925916091081858, "success_rate.epoch.global": 0.5940086716594403, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9904069767441861, "tokens_p.mean_in_band": 0.7499302455357143, "tokens_rate.above_band": 0.9388646288209607, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0611353711790393 }, { "epoch": 0.2327127659574468, "grad_norm": 302.4349140001914, "learning_rate": 1.9905557469560656e-07, "loss": 0.8901, "step": 1400, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.484375, "success_rate.epoch.env.logic": 0.3741721854304636, "success_rate.epoch.env.math": 0.8270893371757925, "success_rate.epoch.env.sat": 0.056074766355140186, "success_rate.epoch.env.science": 0.6365868631062002, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49336069271406463, "success_rate.epoch.global": 0.5946582875098193, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.995364010989011, "tokens_p.mean_in_band": 0.6936773255813954, "tokens_rate.above_band": 0.8943488943488943, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10565110565110565 }, { "epoch": 0.23354388297872342, "grad_norm": 118.50638264997775, "learning_rate": 1.9904746693084005e-07, "loss": 0.9304, "step": 1405, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.484375, "success_rate.epoch.env.logic": 0.375, "success_rate.epoch.env.math": 0.8275862068965517, "success_rate.epoch.env.sat": 0.056074766355140186, "success_rate.epoch.env.science": 0.6364749082007344, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49347094083995824, "success_rate.epoch.global": 0.5947533281127643, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.62, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.995365969581749, "tokens_p.mean_in_band": 0.5574776785714286, "tokens_rate.above_band": 0.9037800687285223, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09621993127147767 }, { "epoch": 0.234375, "grad_norm": 195.24173270759556, "learning_rate": 1.9903932536148616e-07, "loss": 0.9224, "step": 1410, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47692307692307695, "success_rate.epoch.env.logic": 0.3770491803278688, "success_rate.epoch.env.math": 0.828080229226361, "success_rate.epoch.env.sat": 0.056074766355140186, "success_rate.epoch.env.science": 0.6378048780487805, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49314559987894013, "success_rate.epoch.global": 0.5957861880608661, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9932491289198606, "tokens_p.mean_below_band": 5.502442945726216e-11, "tokens_p.mean_in_band": 0.7924107142857143, "tokens_rate.above_band": 0.9288025889967637, "tokens_rate.below_band": 0.003236245954692557, "tokens_rate.in_band": 0.06796116504854369 }, { "epoch": 0.23520611702127658, "grad_norm": 461.6477018176616, "learning_rate": 1.9903115000199417e-07, "loss": 0.9814, "step": 1415, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47692307692307695, "success_rate.epoch.env.logic": 0.3770491803278688, "success_rate.epoch.env.math": 0.8262108262108262, "success_rate.epoch.env.sat": 0.056074766355140186, "success_rate.epoch.env.science": 0.6381299332119005, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49300520461962977, "success_rate.epoch.global": 0.5960342146189735, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.6904761904761905, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9893543956043956, "tokens_p.mean_in_band": 0.6155133928571429, "tokens_rate.above_band": 0.8125, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1875 }, { "epoch": 0.2360372340425532, "grad_norm": 198.63044446040092, "learning_rate": 1.9902294086687336e-07, "loss": 0.8357, "step": 1420, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47692307692307695, "success_rate.epoch.env.logic": 0.3758169934640523, "success_rate.epoch.env.math": 0.8267045454545454, "success_rate.epoch.env.sat": 0.056074766355140186, "success_rate.epoch.env.science": 0.6386198547215496, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4929826095186799, "success_rate.epoch.global": 0.5963551764249709, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.5238095238095238, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9985207100591716, "tokens_p.mean_below_band": 2.3401028670377855e-08, "tokens_p.mean_in_band": 0.5246448863636364, "tokens_rate.above_band": 0.9540322580645161, "tokens_rate.below_band": 0.0016129032258064516, "tokens_rate.in_band": 0.04435483870967742 }, { "epoch": 0.23686835106382978, "grad_norm": 187.49450361836384, "learning_rate": 1.990146979706929e-07, "loss": 0.9269, "step": 1425, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4117647058823529, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47692307692307695, "success_rate.epoch.env.logic": 0.3758169934640523, "success_rate.epoch.env.math": 0.8267045454545454, "success_rate.epoch.env.sat": 0.056074766355140186, "success_rate.epoch.env.science": 0.6395418927064497, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49306643115367077, "success_rate.epoch.global": 0.597061098221191, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9911317567567568, "tokens_p.mean_in_band": 0.5437729779411765, "tokens_rate.above_band": 0.8131868131868132, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18681318681318682 }, { "epoch": 0.2376994680851064, "grad_norm": 122.10865540389376, "learning_rate": 1.9900642132808195e-07, "loss": 0.8888, "step": 1430, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4057971014492754, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47692307692307695, "success_rate.epoch.env.logic": 0.37662337662337664, "success_rate.epoch.env.math": 0.8267045454545454, "success_rate.epoch.env.sat": 0.05555555555555555, "success_rate.epoch.env.science": 0.6400240384615384, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4925938596701936, "success_rate.epoch.global": 0.5969171483622351, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45999999999999996, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.999460586881473, "tokens_p.mean_in_band": 0.6142113095238095, "tokens_rate.above_band": 0.9324034334763949, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06759656652360516 }, { "epoch": 0.23853058510638298, "grad_norm": 134.6919880680028, "learning_rate": 1.989981109537295e-07, "loss": 0.8967, "step": 1435, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4057971014492754, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47692307692307695, "success_rate.epoch.env.logic": 0.37540453074433655, "success_rate.epoch.env.math": 0.8267045454545454, "success_rate.epoch.env.sat": 0.05555555555555555, "success_rate.epoch.env.science": 0.6395209580838324, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4924373209195803, "success_rate.epoch.global": 0.5964642582628747, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6904761904761904, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9968787605294825, "tokens_p.mean_in_band": 0.5983473557692308, "tokens_rate.above_band": 0.9274553571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07254464285714286 }, { "epoch": 0.2393617021276596, "grad_norm": 185.91445601308547, "learning_rate": 1.9898976686238442e-07, "loss": 0.9484, "step": 1440, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4057971014492754, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47692307692307695, "success_rate.epoch.env.logic": 0.3741935483870968, "success_rate.epoch.env.math": 0.8267045454545454, "success_rate.epoch.env.sat": 0.05504587155963303, "success_rate.epoch.env.science": 0.6388557806912991, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.492220426033608, "success_rate.epoch.global": 0.5957120980091883, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.16666666666666666, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9978805916305916, "tokens_p.mean_in_band": 0.5777064732142857, "tokens_rate.above_band": 0.9252336448598131, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07476635514018691 }, { "epoch": 0.24019281914893617, "grad_norm": 103.61172586118464, "learning_rate": 1.9898138906885548e-07, "loss": 1.0292, "step": 1445, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4057971014492754, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47692307692307695, "success_rate.epoch.env.logic": 0.3729903536977492, "success_rate.epoch.env.math": 0.8267045454545454, "success_rate.epoch.env.sat": 0.05504587155963303, "success_rate.epoch.env.science": 0.638525564803805, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49208102507207696, "success_rate.epoch.global": 0.5953381734810852, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9945739549839229, "tokens_p.mean_below_band": 4.172325134277344e-07, "tokens_p.mean_in_band": 0.5631696428571429, "tokens_rate.above_band": 0.9452887537993921, "tokens_rate.below_band": 0.001519756838905775, "tokens_rate.in_band": 0.05319148936170213 }, { "epoch": 0.24102393617021275, "grad_norm": 97.57096021798458, "learning_rate": 1.9897297758801113e-07, "loss": 0.8264, "step": 1450, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4142857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48484848484848486, "success_rate.epoch.env.logic": 0.3769968051118211, "success_rate.epoch.env.math": 0.8267045454545454, "success_rate.epoch.env.sat": 0.05504587155963303, "success_rate.epoch.env.science": 0.6384113811499703, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4939270513013572, "success_rate.epoch.global": 0.595963442498096, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.72, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9896324685534591, "tokens_p.mean_below_band": 3.213062882423401e-08, "tokens_p.mean_in_band": 0.6689749053030303, "tokens_rate.above_band": 0.9492537313432836, "tokens_rate.below_band": 0.0014925373134328358, "tokens_rate.in_band": 0.049253731343283584 }, { "epoch": 0.24185505319148937, "grad_norm": 399.4931537788219, "learning_rate": 1.9896453243477974e-07, "loss": 0.7924, "step": 1455, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4142857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48484848484848486, "success_rate.epoch.env.logic": 0.37579617834394907, "success_rate.epoch.env.math": 0.8271954674220963, "success_rate.epoch.env.sat": 0.05504587155963303, "success_rate.epoch.env.science": 0.6385115180153573, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4938716360345451, "success_rate.epoch.global": 0.5960516324981018, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 1.0001646903820818, "tokens_p.mean_in_band": 0.6193892045454545, "tokens_rate.above_band": 0.9324324324324325, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06756756756756757 }, { "epoch": 0.24268617021276595, "grad_norm": 219.41326363457327, "learning_rate": 1.989560536241494e-07, "loss": 0.8301, "step": 1460, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4142857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48484848484848486, "success_rate.epoch.env.logic": 0.37777777777777777, "success_rate.epoch.env.math": 0.8271954674220963, "success_rate.epoch.env.sat": 0.05504587155963303, "success_rate.epoch.env.science": 0.6397881106533255, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49416783531379943, "success_rate.epoch.global": 0.5971223021582733, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9897959183673469, "tokens_p.mean_below_band": 3.342393029015511e-11, "tokens_p.mean_in_band": 0.6927083333333334, "tokens_rate.above_band": 0.875, "tokens_rate.below_band": 0.004464285714285714, "tokens_rate.in_band": 0.12053571428571429 }, { "epoch": 0.24351728723404256, "grad_norm": 138.15334489710972, "learning_rate": 1.9894754117116785e-07, "loss": 0.9311, "step": 1465, "success_rate.epoch.env.abd": 0.375, "success_rate.epoch.env.agentgym:alfworld": 0.4084507042253521, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48484848484848486, "success_rate.epoch.env.logic": 0.37658227848101267, "success_rate.epoch.env.math": 0.8248587570621468, "success_rate.epoch.env.sat": 0.05504587155963303, "success_rate.epoch.env.science": 0.6392961876832844, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4932715496149705, "success_rate.epoch.global": 0.5962264150943396, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.1, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9945874183006536, "tokens_p.mean_in_band": 0.6085111177884616, "tokens_rate.above_band": 0.8547486033519553, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1452513966480447 }, { "epoch": 0.24434840425531915, "grad_norm": 142.00132860885753, "learning_rate": 1.989389950909427e-07, "loss": 0.8617, "step": 1470, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.4084507042253521, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.3785488958990536, "success_rate.epoch.env.math": 0.8253521126760563, "success_rate.epoch.env.sat": 0.05504587155963303, "success_rate.epoch.env.science": 0.6391812865497076, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49082152268942364, "success_rate.epoch.global": 0.5960887551711169, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9830729166666666, "tokens_p.mean_below_band": 2.0211564333294518e-07, "tokens_p.mean_in_band": 0.06323905323134561, "tokens_rate.above_band": 0.15209928703459202, "tokens_rate.below_band": 0.002112490097702667, "tokens_rate.in_band": 0.8457882228677053 }, { "epoch": 0.24517952127659576, "grad_norm": 200.88504001102518, "learning_rate": 1.989304153986411e-07, "loss": 1.0422, "step": 1475, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.4084507042253521, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.3805031446540881, "success_rate.epoch.env.math": 0.8263305322128851, "success_rate.epoch.env.sat": 0.05454545454545454, "success_rate.epoch.env.science": 0.6394399066511085, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49106614736024956, "success_rate.epoch.global": 0.5965504311961005, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9916516245487365, "tokens_p.mean_in_band": 0.6830929487179487, "tokens_rate.above_band": 0.8765822784810127, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12341772151898735 }, { "epoch": 0.24601063829787234, "grad_norm": 227.04335971840112, "learning_rate": 1.9892180210948992e-07, "loss": 0.9042, "step": 1480, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.4084507042253521, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38317757009345793, "success_rate.epoch.env.math": 0.8263305322128851, "success_rate.epoch.env.sat": 0.05454545454545454, "success_rate.epoch.env.science": 0.6401162790697674, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4913707653473431, "success_rate.epoch.global": 0.5971599402092675, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.6904761904761905, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.999033938172043, "tokens_p.mean_in_band": 0.5027426861702128, "tokens_rate.above_band": 0.9405815423514539, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05941845764854614 }, { "epoch": 0.24684175531914893, "grad_norm": 192.13632320292336, "learning_rate": 1.989131552387756e-07, "loss": 0.7372, "step": 1485, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.4084507042253521, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38317757009345793, "success_rate.epoch.env.math": 0.8272980501392758, "success_rate.epoch.env.sat": 0.05454545454545454, "success_rate.epoch.env.science": 0.6402085747392816, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4914671120378799, "success_rate.epoch.global": 0.5976154992548435, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7222222222222222, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9955553519061584, "tokens_p.mean_in_band": 0.5665283203125, "tokens_rate.above_band": 0.9551820728291317, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04481792717086835 }, { "epoch": 0.24767287234042554, "grad_norm": 145.0123704187015, "learning_rate": 1.9890447480184426e-07, "loss": 0.8365, "step": 1490, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.4027777777777778, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38317757009345793, "success_rate.epoch.env.math": 0.8282548476454293, "success_rate.epoch.env.sat": 0.05454545454545454, "success_rate.epoch.env.science": 0.640877598152425, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49109919335349106, "success_rate.epoch.global": 0.5982176011882658, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9985641891891892, "tokens_p.mean_in_band": 0.5862068965517241, "tokens_rate.above_band": 0.9622886866059818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0377113133940182 }, { "epoch": 0.24850398936170212, "grad_norm": 350.40903485092116, "learning_rate": 1.9889576081410156e-07, "loss": 0.8084, "step": 1495, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.4027777777777778, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38317757009345793, "success_rate.epoch.env.math": 0.8282548476454293, "success_rate.epoch.env.sat": 0.05454545454545454, "success_rate.epoch.env.science": 0.6408045977011494, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4910925569488296, "success_rate.epoch.global": 0.5982969270640504, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9998094512195121, "tokens_p.mean_in_band": 0.5777698863636364, "tokens_rate.above_band": 0.9371428571428572, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06285714285714286 }, { "epoch": 0.24933510638297873, "grad_norm": 131.929210476678, "learning_rate": 1.9888701329101264e-07, "loss": 0.8795, "step": 1500, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.4027777777777778, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38317757009345793, "success_rate.epoch.env.math": 0.8264462809917356, "success_rate.epoch.env.sat": 0.05454545454545454, "success_rate.epoch.env.science": 0.6407322654462243, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4909215661389552, "success_rate.epoch.global": 0.5983032091479159, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.5625, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9814583333333333, "tokens_p.mean_in_band": 0.4888630319148936, "tokens_rate.above_band": 0.7614213197969543, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.23857868020304568 }, { "epoch": 0.2501662234042553, "grad_norm": 158.09697914328336, "learning_rate": 1.9887823224810223e-07, "loss": 0.7213, "step": 1505, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.4027777777777778, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38317757009345793, "success_rate.epoch.env.math": 0.8273972602739726, "success_rate.epoch.env.sat": 0.05405405405405406, "success_rate.epoch.env.science": 0.6408209806157354, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49097141104444136, "success_rate.epoch.global": 0.5985294117647059, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9884708737864077, "tokens_p.mean_in_band": 0.6474609375, "tokens_rate.above_band": 0.8110236220472441, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1889763779527559 }, { "epoch": 0.2509973404255319, "grad_norm": 138.87652901713895, "learning_rate": 1.9886941770095453e-07, "loss": 0.9555, "step": 1510, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.4027777777777778, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38390092879256965, "success_rate.epoch.env.math": 0.8278688524590164, "success_rate.epoch.env.sat": 0.05405405405405406, "success_rate.epoch.env.science": 0.640704945992041, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4910694943408469, "success_rate.epoch.global": 0.5986070381231672, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9948940417690417, "tokens_p.mean_below_band": 2.2851054382044822e-11, "tokens_p.mean_in_band": 0.6820790816326531, "tokens_rate.above_band": 0.9421296296296297, "tokens_rate.below_band": 0.0011574074074074073, "tokens_rate.in_band": 0.056712962962962965 }, { "epoch": 0.2518284574468085, "grad_norm": 205.53311133833452, "learning_rate": 1.9886056966521324e-07, "loss": 0.7624, "step": 1515, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.3972602739726027, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38390092879256965, "success_rate.epoch.env.math": 0.8278688524590164, "success_rate.epoch.env.sat": 0.05405405405405406, "success_rate.epoch.env.science": 0.6409966024915063, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49059441731305503, "success_rate.epoch.global": 0.5986842105263158, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.3888888888888889, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9957157258064516, "tokens_p.mean_in_band": 0.657421875, "tokens_rate.above_band": 0.9253731343283582, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07462686567164178 }, { "epoch": 0.2526595744680851, "grad_norm": 169.47070855145245, "learning_rate": 1.9885168815658142e-07, "loss": 0.887, "step": 1520, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.3972602739726027, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38390092879256965, "success_rate.epoch.env.math": 0.8278688524590164, "success_rate.epoch.env.sat": 0.05405405405405406, "success_rate.epoch.env.science": 0.6410835214446953, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49060231903607227, "success_rate.epoch.global": 0.5988329686360321, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9918241279069767, "tokens_p.mean_in_band": 0.6893279246794872, "tokens_rate.above_band": 0.8151658767772512, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1848341232227488 }, { "epoch": 0.2534906914893617, "grad_norm": 158.18996698575802, "learning_rate": 1.9884277319082154e-07, "loss": 0.8415, "step": 1525, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.3972602739726027, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38390092879256965, "success_rate.epoch.env.math": 0.8288043478260869, "success_rate.epoch.env.sat": 0.05405405405405406, "success_rate.epoch.env.science": 0.6408094435075885, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4906624478933417, "success_rate.epoch.global": 0.599054889131225, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9900568181818182, "tokens_p.mean_in_band": 0.575927734375, "tokens_rate.above_band": 0.8048780487804879, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1951219512195122 }, { "epoch": 0.2543218085106383, "grad_norm": 150.50251436930847, "learning_rate": 1.9883382478375555e-07, "loss": 0.9778, "step": 1530, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.3972602739726027, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38390092879256965, "success_rate.epoch.env.math": 0.8288043478260869, "success_rate.epoch.env.sat": 0.05405405405405406, "success_rate.epoch.env.science": 0.6403361344537815, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4906194197975411, "success_rate.epoch.global": 0.5988393180993834, "success_rate.window.env.ded": 0.0, "success_rate.window.env.science": 0.4444444444444444, "success_rate.window.env_macro_mean": 0.2222222222222222, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9850780116110305, "tokens_p.mean_below_band": 4.1443854570388794e-08, "tokens_p.mean_in_band": 0.58267578125, "tokens_rate.above_band": 0.7741573033707865, "tokens_rate.below_band": 0.0011235955056179776, "tokens_rate.in_band": 0.2247191011235955 }, { "epoch": 0.2551529255319149, "grad_norm": 386.06829289088074, "learning_rate": 1.9882484295126466e-07, "loss": 1.1215, "step": 1535, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.3972602739726027, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38271604938271603, "success_rate.epoch.env.math": 0.8297297297297297, "success_rate.epoch.env.sat": 0.05357142857142857, "success_rate.epoch.env.science": 0.640581330352152, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4905742446074987, "success_rate.epoch.global": 0.5989150090415913, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.41666666666666663, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9937065972222222, "tokens_p.mean_in_band": 0.6541819852941176, "tokens_rate.above_band": 0.8495575221238938, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1504424778761062 }, { "epoch": 0.2559840425531915, "grad_norm": 514.3055939231841, "learning_rate": 1.9881582770928938e-07, "loss": 1.0413, "step": 1540, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.3972602739726027, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38153846153846155, "success_rate.epoch.env.math": 0.8279569892473119, "success_rate.epoch.env.sat": 0.05357142857142857, "success_rate.epoch.env.science": 0.64086859688196, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.490332148080511, "success_rate.epoch.global": 0.5989189189189189, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4047619047619048, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 1.0002003205128205, "tokens_p.mean_in_band": 0.560791015625, "tokens_rate.above_band": 0.9285714285714286, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07142857142857142 }, { "epoch": 0.2568151595744681, "grad_norm": 360.7866453047551, "learning_rate": 1.988067790738296e-07, "loss": 0.9376, "step": 1545, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.3918918918918919, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.3803680981595092, "success_rate.epoch.env.math": 0.8284182305630027, "success_rate.epoch.env.sat": 0.05357142857142857, "success_rate.epoch.env.science": 0.64, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4897006843508809, "success_rate.epoch.global": 0.5981308411214953, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.25, "success_rate.window.env_macro_mean": 0.18333333333333332, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9968513001083423, "tokens_p.mean_below_band": 8.754432201385498e-08, "tokens_p.mean_in_band": 0.5670166015625, "tokens_rate.above_band": 0.9495884773662552, "tokens_rate.below_band": 0.00102880658436214, "tokens_rate.in_band": 0.04938271604938271 }, { "epoch": 0.2576462765957447, "grad_norm": 252.78054867468754, "learning_rate": 1.987976970609444e-07, "loss": 0.7714, "step": 1550, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38666666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38109756097560976, "success_rate.epoch.env.math": 0.8293333333333334, "success_rate.epoch.env.sat": 0.05309734513274336, "success_rate.epoch.env.science": 0.6402439024390244, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4893542442928396, "success_rate.epoch.global": 0.5981375358166189, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9974118098159509, "tokens_p.mean_in_band": 0.597320556640625, "tokens_rate.above_band": 0.9532163742690059, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04678362573099415 }, { "epoch": 0.25847739361702127, "grad_norm": 211.21949877739684, "learning_rate": 1.9878858168675214e-07, "loss": 0.8431, "step": 1555, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38666666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47761194029850745, "success_rate.epoch.env.logic": 0.38109756097560976, "success_rate.epoch.env.math": 0.8293333333333334, "success_rate.epoch.env.sat": 0.05309734513274336, "success_rate.epoch.env.science": 0.6408839779005525, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4894124329711603, "success_rate.epoch.global": 0.5986418870621872, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951847484276729, "tokens_p.mean_in_band": 0.6688368055555556, "tokens_rate.above_band": 0.9464285714285714, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05357142857142857 }, { "epoch": 0.25930851063829785, "grad_norm": 224.0802653100846, "learning_rate": 1.987794329674304e-07, "loss": 0.8477, "step": 1560, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38666666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47058823529411764, "success_rate.epoch.env.logic": 0.38109756097560976, "success_rate.epoch.env.math": 0.8297872340425532, "success_rate.epoch.env.sat": 0.05309734513274336, "success_rate.epoch.env.science": 0.6407713498622589, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48880493912266354, "success_rate.epoch.global": 0.5985739750445633, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4285714285714286, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9922518298261666, "tokens_p.mean_below_band": 7.851497230149107e-13, "tokens_p.mean_in_band": 0.7527426861702128, "tokens_rate.above_band": 0.92003367003367, "tokens_rate.below_band": 0.0008417508417508417, "tokens_rate.in_band": 0.07912457912457913 }, { "epoch": 0.2601396276595745, "grad_norm": 132.72067798628524, "learning_rate": 1.9877025091921588e-07, "loss": 0.6671, "step": 1565, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38666666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47058823529411764, "success_rate.epoch.env.logic": 0.3829787234042553, "success_rate.epoch.env.math": 0.8311345646437994, "success_rate.epoch.env.sat": 0.05309734513274336, "success_rate.epoch.env.science": 0.6417582417582418, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48918815593410675, "success_rate.epoch.global": 0.5998578535891969, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9937015503875969, "tokens_p.mean_in_band": 0.7973090277777778, "tokens_rate.above_band": 0.9052631578947369, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09473684210526316 }, { "epoch": 0.2609707446808511, "grad_norm": 181.16148600362826, "learning_rate": 1.987610355584045e-07, "loss": 0.8627, "step": 1570, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38666666666666666, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47058823529411764, "success_rate.epoch.env.logic": 0.3829787234042553, "success_rate.epoch.env.math": 0.8315789473684211, "success_rate.epoch.env.sat": 0.05263157894736842, "success_rate.epoch.env.science": 0.6420361247947455, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48921147407735677, "success_rate.epoch.global": 0.600070846617074, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4285714285714286, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9932133838383839, "tokens_p.mean_below_band": 1.6916601452976465e-10, "tokens_p.mean_in_band": 0.6852678571428571, "tokens_rate.above_band": 0.8722466960352423, "tokens_rate.below_band": 0.004405286343612335, "tokens_rate.in_band": 0.12334801762114538 }, { "epoch": 0.26180186170212766, "grad_norm": 490.13935433943726, "learning_rate": 1.9875178690135123e-07, "loss": 0.8757, "step": 1575, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.39473684210526316, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47058823529411764, "success_rate.epoch.env.logic": 0.3806646525679758, "success_rate.epoch.env.math": 0.8324607329842932, "success_rate.epoch.env.sat": 0.06086956521739131, "success_rate.epoch.env.science": 0.642818132168214, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49063491806478227, "success_rate.epoch.global": 0.6007765619484645, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9977602389078498, "tokens_p.mean_in_band": 0.5754123263888888, "tokens_rate.above_band": 0.9606557377049181, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03934426229508197 }, { "epoch": 0.26263297872340424, "grad_norm": 209.38634985288667, "learning_rate": 1.987425049644702e-07, "loss": 0.9739, "step": 1580, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.39473684210526316, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47058823529411764, "success_rate.epoch.env.logic": 0.3806646525679758, "success_rate.epoch.env.math": 0.8328981723237598, "success_rate.epoch.env.sat": 0.06086956521739131, "success_rate.epoch.env.science": 0.6410675381263616, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4905155403645654, "success_rate.epoch.global": 0.5998591053187742, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9938360091743119, "tokens_p.mean_below_band": 5.289912223815918e-07, "tokens_p.mean_in_band": 0.5101316689466484, "tokens_rate.above_band": 0.8168873344991257, "tokens_rate.below_band": 0.0004996252810392206, "tokens_rate.in_band": 0.18261304021983513 }, { "epoch": 0.2634640957446808, "grad_norm": 192.27175642442387, "learning_rate": 1.9873318976423458e-07, "loss": 0.7256, "step": 1585, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.39473684210526316, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47058823529411764, "success_rate.epoch.env.logic": 0.3806646525679758, "success_rate.epoch.env.math": 0.8333333333333334, "success_rate.epoch.env.sat": 0.06779661016949153, "success_rate.epoch.env.science": 0.6414991852254209, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49122407246099564, "success_rate.epoch.global": 0.6000702247191011, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.3333333333333333, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7222222222222223, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9947467672413793, "tokens_p.mean_below_band": 4.1443854570388794e-08, "tokens_p.mean_in_band": 0.67578125, "tokens_rate.above_band": 0.8405797101449275, "tokens_rate.below_band": 0.0036231884057971015, "tokens_rate.in_band": 0.15579710144927536 }, { "epoch": 0.26429521276595747, "grad_norm": 240.16106637909726, "learning_rate": 1.9872384131717652e-07, "loss": 0.7972, "step": 1590, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.39473684210526316, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47058823529411764, "success_rate.epoch.env.logic": 0.3825301204819277, "success_rate.epoch.env.math": 0.8337662337662337, "success_rate.epoch.env.sat": 0.06779661016949153, "success_rate.epoch.env.science": 0.6413867822318526, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4914227965840215, "success_rate.epoch.global": 0.6003502626970227, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9920742753623188, "tokens_p.mean_in_band": 0.5440340909090909, "tokens_rate.above_band": 0.8625, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1375 }, { "epoch": 0.26512632978723405, "grad_norm": 354.5628839874318, "learning_rate": 1.987144596398873e-07, "loss": 0.9163, "step": 1595, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.39473684210526316, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47058823529411764, "success_rate.epoch.env.logic": 0.3825301204819277, "success_rate.epoch.env.math": 0.8350515463917526, "success_rate.epoch.env.sat": 0.06722689075630252, "success_rate.epoch.env.science": 0.6414686825053996, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49149529599182845, "success_rate.epoch.global": 0.6006980802792321, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9906684027777778, "tokens_p.mean_in_band": 0.5745634191176471, "tokens_rate.above_band": 0.8089887640449438, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.19101123595505617 }, { "epoch": 0.26595744680851063, "grad_norm": 219.75021955239902, "learning_rate": 1.9870504474901703e-07, "loss": 0.9037, "step": 1600, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38961038961038963, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47058823529411764, "success_rate.epoch.env.logic": 0.37910447761194027, "success_rate.epoch.env.math": 0.8333333333333334, "success_rate.epoch.env.sat": 0.06722689075630252, "success_rate.epoch.env.science": 0.6413160733549083, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49054775803057643, "success_rate.epoch.global": 0.5997215454229029, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.3125, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9984931506849315, "tokens_p.mean_in_band": 0.5259588068181819, "tokens_rate.above_band": 0.9707446808510638, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02925531914893617 }, { "epoch": 0.2667885638297872, "grad_norm": 157.0005335801091, "learning_rate": 1.9869559666127485e-07, "loss": 1.0439, "step": 1605, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38961038961038963, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47058823529411764, "success_rate.epoch.env.logic": 0.38095238095238093, "success_rate.epoch.env.math": 0.8337595907928389, "success_rate.epoch.env.sat": 0.06722689075630252, "success_rate.epoch.env.science": 0.6413978494623656, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49076193411306773, "success_rate.epoch.global": 0.6001388406803193, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9944401840490797, "tokens_p.mean_in_band": 0.5199776785714286, "tokens_rate.above_band": 0.8232323232323232, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17676767676767677 }, { "epoch": 0.26761968085106386, "grad_norm": 170.02996866554872, "learning_rate": 1.986861153934288e-07, "loss": 0.8706, "step": 1610, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38961038961038963, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47058823529411764, "success_rate.epoch.env.logic": 0.3827893175074184, "success_rate.epoch.env.math": 0.8350253807106599, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.6414385399892646, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4909967698321697, "success_rate.epoch.global": 0.6005538248528903, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.8, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6166666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9886363636363636, "tokens_p.mean_in_band": 0.539794921875, "tokens_rate.above_band": 0.8608695652173913, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1391304347826087 }, { "epoch": 0.26845079787234044, "grad_norm": 134.0523531361425, "learning_rate": 1.9867660096230582e-07, "loss": 0.8275, "step": 1615, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38961038961038963, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45714285714285713, "success_rate.epoch.env.logic": 0.3816568047337278, "success_rate.epoch.env.math": 0.8358585858585859, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.6420150053590568, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4849295290159278, "success_rate.epoch.global": 0.6004140786749482, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.35, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9913850706376479, "tokens_p.mean_below_band": 2.868473529815674e-07, "tokens_p.mean_in_band": 0.5772412025043178, "tokens_rate.above_band": 0.8186933416692717, "tokens_rate.below_band": 0.00031259768677711783, "tokens_rate.in_band": 0.18099406064395124 }, { "epoch": 0.269281914893617, "grad_norm": 203.83527746812365, "learning_rate": 1.986670533847917e-07, "loss": 0.7779, "step": 1620, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38961038961038963, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45714285714285713, "success_rate.epoch.env.logic": 0.3816568047337278, "success_rate.epoch.env.math": 0.8358585858585859, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.6422851041110518, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4849540834479273, "success_rate.epoch.global": 0.6006884681583476, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9837372448979592, "tokens_p.mean_in_band": 0.524387668918919, "tokens_rate.above_band": 0.725925925925926, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2740740740740741 }, { "epoch": 0.2701130319148936, "grad_norm": 239.60967974463873, "learning_rate": 1.9865747267783107e-07, "loss": 0.7279, "step": 1625, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38961038961038963, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45714285714285713, "success_rate.epoch.env.logic": 0.3805309734513274, "success_rate.epoch.env.math": 0.8358585858585859, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.6427432216905901, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48489338220221256, "success_rate.epoch.global": 0.6008922443376802, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.3888888888888889, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9976105697151424, "tokens_p.mean_in_band": 0.5700520833333333, "tokens_rate.above_band": 0.9569583931133429, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043041606886657105 }, { "epoch": 0.2709441489361702, "grad_norm": 174.79111366182423, "learning_rate": 1.9864785885842727e-07, "loss": 1.0569, "step": 1630, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38961038961038963, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45714285714285713, "success_rate.epoch.env.logic": 0.3805309734513274, "success_rate.epoch.env.math": 0.8341708542713567, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.6424403183023872, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48471241538626414, "success_rate.epoch.global": 0.6006849315068493, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.35714285714285715, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9910714285714286, "tokens_p.mean_in_band": 0.5723805147058824, "tokens_rate.above_band": 0.8475336322869955, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15246636771300448 }, { "epoch": 0.27177526595744683, "grad_norm": 243.72217720344182, "learning_rate": 1.9863821194364254e-07, "loss": 0.8252, "step": 1635, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38961038961038963, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45714285714285713, "success_rate.epoch.env.logic": 0.37941176470588234, "success_rate.epoch.env.math": 0.835, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.6426680783483325, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4847067514798227, "success_rate.epoch.global": 0.6009566108643662, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7222222222222222, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9909188034188035, "tokens_p.mean_below_band": 2.066371962428093e-09, "tokens_p.mean_in_band": 0.599609375, "tokens_rate.above_band": 0.924901185770751, "tokens_rate.below_band": 0.003952569169960474, "tokens_rate.in_band": 0.07114624505928854 }, { "epoch": 0.2726063829787234, "grad_norm": 134.0703756130931, "learning_rate": 1.9862853195059778e-07, "loss": 0.8195, "step": 1640, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38961038961038963, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4507042253521127, "success_rate.epoch.env.logic": 0.37941176470588234, "success_rate.epoch.env.math": 0.835, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.6422163588390501, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.484080355907093, "success_rate.epoch.global": 0.6005453306066802, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.35714285714285715, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9951331967213115, "tokens_p.mean_in_band": 0.5467655925829875, "tokens_rate.above_band": 0.8836310960888459, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11636890391115404 }, { "epoch": 0.2734375, "grad_norm": 150.62552403983057, "learning_rate": 1.9861881889647253e-07, "loss": 0.8853, "step": 1645, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38961038961038963, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.37719298245614036, "success_rate.epoch.env.math": 0.8329177057356608, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.6421052631578947, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4843728046149097, "success_rate.epoch.global": 0.600067957866123, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.41666666666666663, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.99298828125, "tokens_p.mean_below_band": 2.7008354663848877e-07, "tokens_p.mean_in_band": 0.6689165900735294, "tokens_rate.above_band": 0.9586578789694428, "tokens_rate.below_band": 0.0005991611743559018, "tokens_rate.in_band": 0.04074295985620132 }, { "epoch": 0.2742686170212766, "grad_norm": 145.90566336549276, "learning_rate": 1.986090727985052e-07, "loss": 0.7381, "step": 1650, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38961038961038963, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.37790697674418605, "success_rate.epoch.env.math": 0.8341584158415841, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.6423319327731093, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48457111134301734, "success_rate.epoch.global": 0.600609756097561, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9943625710227273, "tokens_p.mean_in_band": 0.5457899305555556, "tokens_rate.above_band": 0.9513513513513514, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04864864864864865 }, { "epoch": 0.27509973404255317, "grad_norm": 130.26235127790272, "learning_rate": 1.9859929367399266e-07, "loss": 0.7372, "step": 1655, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38961038961038963, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.37790697674418605, "success_rate.epoch.env.math": 0.8349753694581281, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.6420335429769392, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4846182535085059, "success_rate.epoch.global": 0.6007437457741718, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9983320251177394, "tokens_p.mean_in_band": 0.6366257440476191, "tokens_rate.above_band": 0.9381443298969072, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.061855670103092786 }, { "epoch": 0.2759308510638298, "grad_norm": 178.74040015921125, "learning_rate": 1.9858948154029048e-07, "loss": 0.8405, "step": 1660, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38961038961038963, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.3786127167630058, "success_rate.epoch.env.math": 0.8357843137254902, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.6419236800836383, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4847459645442223, "success_rate.epoch.global": 0.6009437141894169, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.525, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9947916666666666, "tokens_p.mean_in_band": 0.7235863095238095, "tokens_rate.above_band": 0.8813559322033898, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11864406779661017 }, { "epoch": 0.2767619680851064, "grad_norm": 279.2343264473249, "learning_rate": 1.9857963641481276e-07, "loss": 0.8266, "step": 1665, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38461538461538464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.37752161383285304, "success_rate.epoch.env.math": 0.8357843137254902, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.6416666666666667, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48416931714948325, "success_rate.epoch.global": 0.6004704301075269, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.20833333333333334, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9955357142857143, "tokens_p.mean_below_band": 6.927791673660977e-13, "tokens_p.mean_in_band": 0.6085464015151515, "tokens_rate.above_band": 0.9577114427860697, "tokens_rate.below_band": 0.0012437810945273632, "tokens_rate.in_band": 0.041044776119402986 }, { "epoch": 0.277593085106383, "grad_norm": 247.05071237680056, "learning_rate": 1.985697583150322e-07, "loss": 0.9812, "step": 1670, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38461538461538464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.37752161383285304, "success_rate.epoch.env.math": 0.8365853658536585, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.6427829698857737, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4843436221810537, "success_rate.epoch.global": 0.6015415549597856, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.999671052631579, "tokens_p.mean_in_band": 0.5691964285714286, "tokens_rate.above_band": 0.9421487603305785, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05785123966942149 }, { "epoch": 0.27842420212765956, "grad_norm": 119.81137439713153, "learning_rate": 1.9855984725848002e-07, "loss": 0.7041, "step": 1675, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38461538461538464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.3793103448275862, "success_rate.epoch.env.math": 0.8365853658536585, "success_rate.epoch.env.sat": 0.06611570247933884, "success_rate.epoch.env.science": 0.643190056965303, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48449315435259327, "success_rate.epoch.global": 0.6018054162487463, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4285714285714286, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9954144021739131, "tokens_p.mean_below_band": 6.07222318649292e-07, "tokens_p.mean_in_band": 0.5784143518518519, "tokens_rate.above_band": 0.8679245283018868, "tokens_rate.below_band": 0.0047169811320754715, "tokens_rate.in_band": 0.12735849056603774 }, { "epoch": 0.27925531914893614, "grad_norm": 84.33912835295705, "learning_rate": 1.9854990326274592e-07, "loss": 0.7299, "step": 1680, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38461538461538464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4657534246575342, "success_rate.epoch.env.logic": 0.3793103448275862, "success_rate.epoch.env.math": 0.8345498783454988, "success_rate.epoch.env.sat": 0.06611570247933884, "success_rate.epoch.env.science": 0.6437790397521941, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4850362076801325, "success_rate.epoch.global": 0.6022007335778593, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9915987780040734, "tokens_p.mean_in_band": 0.70654296875, "tokens_rate.above_band": 0.9388145315487572, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06118546845124283 }, { "epoch": 0.2800864361702128, "grad_norm": 147.27081177833776, "learning_rate": 1.9853992634547806e-07, "loss": 0.7638, "step": 1685, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.38461538461538464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4657534246575342, "success_rate.epoch.env.logic": 0.3793103448275862, "success_rate.epoch.env.math": 0.8353510895883777, "success_rate.epoch.env.sat": 0.06611570247933884, "success_rate.epoch.env.science": 0.6445473251028807, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48517888918863855, "success_rate.epoch.global": 0.6030585106382979, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9892086330935251, "tokens_p.mean_in_band": 0.6942349137931034, "tokens_rate.above_band": 0.8273809523809523, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17261904761904762 }, { "epoch": 0.28091755319148937, "grad_norm": 156.10964463792166, "learning_rate": 1.98529916524383e-07, "loss": 0.8599, "step": 1690, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4657534246575342, "success_rate.epoch.env.logic": 0.37822349570200575, "success_rate.epoch.env.math": 0.8357487922705314, "success_rate.epoch.env.sat": 0.06611570247933884, "success_rate.epoch.env.science": 0.6437371663244353, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4857507413208097, "success_rate.epoch.global": 0.6026533996683251, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.48, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.996755079006772, "tokens_p.mean_in_band": 0.5961334745762712, "tokens_rate.above_band": 0.9375661375661376, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06243386243386243 }, { "epoch": 0.28174867021276595, "grad_norm": 254.2391005933246, "learning_rate": 1.9851987381722575e-07, "loss": 0.9832, "step": 1695, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4657534246575342, "success_rate.epoch.env.logic": 0.37822349570200575, "success_rate.epoch.env.math": 0.8365384615384616, "success_rate.epoch.env.sat": 0.06611570247933884, "success_rate.epoch.env.science": 0.6434426229508197, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4857957527657474, "success_rate.epoch.global": 0.602780536246276, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9909274193548387, "tokens_p.mean_in_band": 0.6128472222222222, "tokens_rate.above_band": 0.8378378378378378, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16216216216216217 }, { "epoch": 0.28257978723404253, "grad_norm": 114.4961207204423, "learning_rate": 1.985097982418296e-07, "loss": 0.7428, "step": 1700, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.38, "success_rate.epoch.env.math": 0.8369304556354916, "success_rate.epoch.env.sat": 0.06611570247933884, "success_rate.epoch.env.science": 0.6443536024527338, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48673202787596337, "success_rate.epoch.global": 0.6038296467481017, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9897798742138365, "tokens_p.mean_in_band": 0.7215401785714286, "tokens_rate.above_band": 0.9578313253012049, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04216867469879518 }, { "epoch": 0.2834109042553192, "grad_norm": 869.5856733452797, "learning_rate": 1.984996898160763e-07, "loss": 0.7348, "step": 1705, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.38, "success_rate.epoch.env.math": 0.8357142857142857, "success_rate.epoch.env.sat": 0.06504065040650407, "success_rate.epoch.env.science": 0.6442405708460754, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.486513459366809, "success_rate.epoch.global": 0.6034879894702204, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.4222222222222222, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9924679487179487, "tokens_p.mean_in_band": 0.6404157366071429, "tokens_rate.above_band": 0.7768924302788844, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.22310756972111553 }, { "epoch": 0.28424202127659576, "grad_norm": 149.36512886203056, "learning_rate": 1.984895485579058e-07, "loss": 0.8248, "step": 1710, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.3806818181818182, "success_rate.epoch.env.math": 0.833729216152019, "success_rate.epoch.env.sat": 0.06451612903225806, "success_rate.epoch.env.science": 0.6435845213849287, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48628765734718704, "success_rate.epoch.global": 0.6026272577996716, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.08333333333333333, "success_rate.window.global": 0.125, "tokens_p.mean_above_band": 0.997469473007712, "tokens_p.mean_below_band": 1.4435499906539917e-07, "tokens_p.mean_in_band": 0.567311356707317, "tokens_rate.above_band": 0.9036004645760743, "tokens_rate.below_band": 0.0011614401858304297, "tokens_rate.in_band": 0.09523809523809523 }, { "epoch": 0.28507313829787234, "grad_norm": 149.31829693678955, "learning_rate": 1.984793744853163e-07, "loss": 0.9204, "step": 1715, "success_rate.epoch.env.abd": 0.35294117647058826, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.3806818181818182, "success_rate.epoch.env.math": 0.8349056603773585, "success_rate.epoch.env.sat": 0.06451612903225806, "success_rate.epoch.env.science": 0.6434738445911631, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48638454529551195, "success_rate.epoch.global": 0.6030134294136914, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.8571428571428572, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9912280701754386, "tokens_p.mean_in_band": 0.689453125, "tokens_rate.above_band": 0.8028169014084507, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.19718309859154928 }, { "epoch": 0.2859042553191489, "grad_norm": 124.19771709447896, "learning_rate": 1.9846916761636438e-07, "loss": 0.9969, "step": 1720, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.38243626062322944, "success_rate.epoch.env.math": 0.8349056603773585, "success_rate.epoch.env.sat": 0.06451612903225806, "success_rate.epoch.env.science": 0.6433637284701115, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48475149831215786, "success_rate.epoch.global": 0.6029411764705882, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9910919540229886, "tokens_p.mean_in_band": 0.5250592912946429, "tokens_rate.above_band": 0.8534990189666448, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14650098103335513 }, { "epoch": 0.2867353723404255, "grad_norm": 137.96552426288633, "learning_rate": 1.9845892796916466e-07, "loss": 0.929, "step": 1725, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.38243626062322944, "success_rate.epoch.env.math": 0.8356807511737089, "success_rate.epoch.env.sat": 0.06349206349206349, "success_rate.epoch.env.science": 0.6439049064238745, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4847780622403323, "success_rate.epoch.global": 0.6031953048581676, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9943612258953168, "tokens_p.mean_in_band": 0.6978889627659575, "tokens_rate.above_band": 0.9391979301423027, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06080206985769728 }, { "epoch": 0.28756648936170215, "grad_norm": 164.31520923456802, "learning_rate": 1.9844865556189002e-07, "loss": 0.8119, "step": 1730, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.38309859154929576, "success_rate.epoch.env.math": 0.8356807511737089, "success_rate.epoch.env.sat": 0.06349206349206349, "success_rate.epoch.env.science": 0.6439393939393939, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4848414093713856, "success_rate.epoch.global": 0.6031901041666666, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.6444444444444445, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9974747474747475, "tokens_p.mean_in_band": 0.6173295454545454, "tokens_rate.above_band": 0.9557522123893806, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04424778761061947 }, { "epoch": 0.28839760638297873, "grad_norm": 150.14807994498668, "learning_rate": 1.9843835041277148e-07, "loss": 0.7307, "step": 1735, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.38309859154929576, "success_rate.epoch.env.math": 0.8360655737704918, "success_rate.epoch.env.sat": 0.06349206349206349, "success_rate.epoch.env.science": 0.6445115810674723, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4849284102554638, "success_rate.epoch.global": 0.6037674569665475, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9951400501672241, "tokens_p.mean_in_band": 0.6222098214285714, "tokens_rate.above_band": 0.9660743134087237, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033925686591276254 }, { "epoch": 0.2892287234042553, "grad_norm": 243.48793304606505, "learning_rate": 1.9842801254009814e-07, "loss": 0.8648, "step": 1740, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.38375350140056025, "success_rate.epoch.env.math": 0.8364485981308412, "success_rate.epoch.env.sat": 0.06349206349206349, "success_rate.epoch.env.science": 0.6445783132530121, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4850288344733869, "success_rate.epoch.global": 0.6039507772020726, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9951238127544098, "tokens_p.mean_in_band": 0.5689453125, "tokens_rate.above_band": 0.9485199485199485, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05148005148005148 }, { "epoch": 0.2900598404255319, "grad_norm": 196.45183014909537, "learning_rate": 1.9841764196221722e-07, "loss": 0.6999, "step": 1745, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.38375350140056025, "success_rate.epoch.env.math": 0.8368298368298368, "success_rate.epoch.env.sat": 0.06349206349206349, "success_rate.epoch.env.science": 0.6446115288220552, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48506651213411767, "success_rate.epoch.global": 0.6041397153945667, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9866803278688525, "tokens_p.mean_in_band": 0.5878276209677419, "tokens_rate.above_band": 0.7973856209150327, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.20261437908496732 }, { "epoch": 0.2908909574468085, "grad_norm": 311.443971821392, "learning_rate": 1.9840723869753396e-07, "loss": 0.9836, "step": 1750, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.38375350140056025, "success_rate.epoch.env.math": 0.8375870069605569, "success_rate.epoch.env.sat": 0.06299212598425197, "success_rate.epoch.env.science": 0.645, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48512521247964985, "success_rate.epoch.global": 0.604516129032258, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9888535031847133, "tokens_p.mean_in_band": 0.6088541666666667, "tokens_rate.above_band": 0.839572192513369, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16042780748663102 }, { "epoch": 0.2917220744680851, "grad_norm": 134.917482294454, "learning_rate": 1.9839680276451155e-07, "loss": 0.7225, "step": 1755, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.38440111420612816, "success_rate.epoch.env.math": 0.8379629629629629, "success_rate.epoch.env.sat": 0.06299212598425197, "success_rate.epoch.env.science": 0.6443890274314215, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4851627212286857, "success_rate.epoch.global": 0.6042471042471043, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6904761904761904, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9990768094534712, "tokens_p.mean_in_band": 0.459375, "tokens_rate.above_band": 0.9312242090784044, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0687757909215956 }, { "epoch": 0.2925531914893617, "grad_norm": 133.6640868994498, "learning_rate": 1.983863341816712e-07, "loss": 0.7707, "step": 1760, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.3861111111111111, "success_rate.epoch.env.math": 0.8379629629629629, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.6437810945273632, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48521817013929236, "success_rate.epoch.global": 0.603852327447833, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6428571428571428, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9927685950413223, "tokens_p.mean_in_band": 0.6369850852272727, "tokens_rate.above_band": 0.8461538461538461, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15384615384615385 }, { "epoch": 0.2933843085106383, "grad_norm": 169.19694821539974, "learning_rate": 1.9837583296759218e-07, "loss": 0.8373, "step": 1765, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.3878116343490305, "success_rate.epoch.env.math": 0.836405529953917, "success_rate.epoch.env.sat": 0.06201550387596899, "success_rate.epoch.env.science": 0.6439920556107249, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4776305539442806, "success_rate.epoch.global": 0.6037131882202305, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45999999999999996, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9961905991735537, "tokens_p.mean_in_band": 0.6551192434210527, "tokens_rate.above_band": 0.9272030651340997, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07279693486590039 }, { "epoch": 0.2942154255319149, "grad_norm": 175.58074786393445, "learning_rate": 1.9836529914091152e-07, "loss": 1.0126, "step": 1770, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.3924050632911392, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.3878116343490305, "success_rate.epoch.env.math": 0.8375286041189931, "success_rate.epoch.env.sat": 0.06201550387596899, "success_rate.epoch.env.science": 0.6450495049504951, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4778287833538121, "success_rate.epoch.global": 0.6048515799553144, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9898298816568047, "tokens_p.mean_in_band": 0.75375, "tokens_rate.above_band": 0.8711340206185567, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12886597938144329 }, { "epoch": 0.2950465425531915, "grad_norm": 225.79861534910873, "learning_rate": 1.9835473272032416e-07, "loss": 0.8841, "step": 1775, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.38950276243093923, "success_rate.epoch.env.math": 0.8390022675736961, "success_rate.epoch.env.sat": 0.06153846153846154, "success_rate.epoch.env.science": 0.6454005934718101, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.47879549012011036, "success_rate.epoch.global": 0.6056651814131127, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954427083333334, "tokens_p.mean_in_band": 0.69140625, "tokens_rate.above_band": 0.9411764705882353, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.058823529411764705 }, { "epoch": 0.2958776595744681, "grad_norm": 396.33425728135785, "learning_rate": 1.9834413372458295e-07, "loss": 1.061, "step": 1780, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.3917808219178082, "success_rate.epoch.env.math": 0.8390022675736961, "success_rate.epoch.env.sat": 0.06153846153846154, "success_rate.epoch.env.science": 0.6451135241855873, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.47897648922926, "success_rate.epoch.global": 0.6055890758971102, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.38888888888888884, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9973275370581528, "tokens_p.mean_in_band": 0.5534939236111112, "tokens_rate.above_band": 0.9605695509309967, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03943044906900329 }, { "epoch": 0.2967087765957447, "grad_norm": 181.6838301079621, "learning_rate": 1.9833350217249847e-07, "loss": 0.8669, "step": 1785, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.39402173913043476, "success_rate.epoch.env.math": 0.8390022675736961, "success_rate.epoch.env.sat": 0.06153846153846154, "success_rate.epoch.env.science": 0.645320197044335, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4791989974175667, "success_rate.epoch.global": 0.605830164765526, "success_rate.window.env.logic": 0.4, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9985522598870057, "tokens_p.mean_in_band": 0.5005008012820513, "tokens_rate.above_band": 0.9577922077922078, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04220779220779221 }, { "epoch": 0.29753989361702127, "grad_norm": 144.47263518111075, "learning_rate": 1.9832283808293913e-07, "loss": 0.8329, "step": 1790, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.39402173913043476, "success_rate.epoch.env.math": 0.8393665158371041, "success_rate.epoch.env.sat": 0.06153846153846154, "success_rate.epoch.env.science": 0.6463654223968566, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4793271313826512, "success_rate.epoch.global": 0.6067024976288334, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9906352908891328, "tokens_p.mean_in_band": 0.6921027131782945, "tokens_rate.above_band": 0.8759615384615385, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12403846153846154 }, { "epoch": 0.29837101063829785, "grad_norm": 258.9570646023226, "learning_rate": 1.983121414748311e-07, "loss": 0.7325, "step": 1795, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.39402173913043476, "success_rate.epoch.env.math": 0.8400900900900901, "success_rate.epoch.env.sat": 0.061068702290076333, "success_rate.epoch.env.science": 0.6474045053868757, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.47944466756398024, "success_rate.epoch.global": 0.6075031525851198, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9963565022421524, "tokens_p.mean_below_band": 4.843059286940843e-11, "tokens_p.mean_in_band": 0.6871995192307693, "tokens_rate.above_band": 0.892, "tokens_rate.below_band": 0.004, "tokens_rate.in_band": 0.104 }, { "epoch": 0.2992021276595745, "grad_norm": 117.93606495434574, "learning_rate": 1.983014123671582e-07, "loss": 0.737, "step": 1800, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.39295392953929537, "success_rate.epoch.env.math": 0.8400900900900901, "success_rate.epoch.env.sat": 0.06818181818181818, "success_rate.epoch.env.science": 0.6484375, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4800881494652282, "success_rate.epoch.global": 0.6081761006289308, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971509971509972, "tokens_p.mean_in_band": 0.5111083984375, "tokens_rate.above_band": 0.9564032697547684, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043596730245231606 }, { "epoch": 0.3000332446808511, "grad_norm": 165.55263056487317, "learning_rate": 1.9829065077896203e-07, "loss": 0.8661, "step": 1805, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.39622641509433965, "success_rate.epoch.env.math": 0.8400900900900901, "success_rate.epoch.env.sat": 0.07462686567164178, "success_rate.epoch.env.science": 0.6481481481481481, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4809452568464115, "success_rate.epoch.global": 0.6082183186951067, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.725, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9952512254901961, "tokens_p.mean_in_band": 0.6074683779761905, "tokens_rate.above_band": 0.9066666666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09333333333333334 }, { "epoch": 0.30086436170212766, "grad_norm": 168.1160135624176, "learning_rate": 1.9827985672934172e-07, "loss": 0.9137, "step": 1810, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.3951612903225806, "success_rate.epoch.env.math": 0.8400900900900901, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.6488326848249028, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4815338048204874, "success_rate.epoch.global": 0.6084507042253521, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.999292071197411, "tokens_p.mean_in_band": 0.598876953125, "tokens_rate.above_band": 0.9507692307692308, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04923076923076923 }, { "epoch": 0.30169547872340424, "grad_norm": 267.5530777460492, "learning_rate": 1.9826903023745408e-07, "loss": 0.9431, "step": 1815, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.3983957219251337, "success_rate.epoch.env.math": 0.8400900900900901, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.6482290150412421, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48119966441254064, "success_rate.epoch.global": 0.608179831408055, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.998049303655807, "tokens_p.mean_below_band": 2.477318048477173e-07, "tokens_p.mean_in_band": 0.468992761299435, "tokens_rate.above_band": 0.9569252736792003, "tokens_rate.below_band": 0.0009519276534983341, "tokens_rate.in_band": 0.04212279866730129 }, { "epoch": 0.3025265957446808, "grad_norm": 138.39670036359678, "learning_rate": 1.9825817132251345e-07, "loss": 0.8651, "step": 1820, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.4, "success_rate.epoch.env.math": 0.8404494382022472, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.6484526112185687, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48139850280929974, "success_rate.epoch.global": 0.6086550435865504, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.7380952380952381, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9938599706744868, "tokens_p.mean_in_band": 0.55126953125, "tokens_rate.above_band": 0.9393939393939394, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06060606060606061 }, { "epoch": 0.30335771276595747, "grad_norm": 168.03979056395144, "learning_rate": 1.982472800037918e-07, "loss": 0.9215, "step": 1825, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.39893617021276595, "success_rate.epoch.env.math": 0.8404494382022472, "success_rate.epoch.env.sat": 0.08029197080291971, "success_rate.epoch.env.science": 0.6493005306319344, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48194413793247953, "success_rate.epoch.global": 0.6090062111801242, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979910714285715, "tokens_p.mean_in_band": 0.52734375, "tokens_rate.above_band": 0.9186351706036745, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08136482939632546 }, { "epoch": 0.30418882978723405, "grad_norm": 211.80256465400694, "learning_rate": 1.982363563006185e-07, "loss": 0.8253, "step": 1830, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.40105540897097625, "success_rate.epoch.env.math": 0.8404494382022472, "success_rate.epoch.env.sat": 0.07971014492753623, "success_rate.epoch.env.science": 0.6490129995185363, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48205776354788216, "success_rate.epoch.global": 0.6087360594795539, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.38888888888888884, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9948738170347003, "tokens_p.mean_in_band": 0.6034740691489362, "tokens_rate.above_band": 0.8708791208791209, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12912087912087913 }, { "epoch": 0.30501994680851063, "grad_norm": 136.09499319463293, "learning_rate": 1.982254002323805e-07, "loss": 0.6994, "step": 1835, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.40105540897097625, "success_rate.epoch.env.math": 0.8408071748878924, "success_rate.epoch.env.sat": 0.07971014492753623, "success_rate.epoch.env.science": 0.6487265737626141, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48206424635967515, "success_rate.epoch.global": 0.6087225487163626, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9996576682316118, "tokens_p.mean_in_band": 0.5874763257575758, "tokens_rate.above_band": 0.9508928571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049107142857142856 }, { "epoch": 0.3058510638297872, "grad_norm": 173.3700097091341, "learning_rate": 1.982144118185221e-07, "loss": 0.7882, "step": 1840, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.4026315789473684, "success_rate.epoch.env.math": 0.8408071748878924, "success_rate.epoch.env.sat": 0.07857142857142857, "success_rate.epoch.env.science": 0.6489208633093525, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4821216775566772, "success_rate.epoch.global": 0.608641975308642, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.4444444444444445, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9930227987421384, "tokens_p.mean_in_band": 0.6687360491071429, "tokens_rate.above_band": 0.8502673796791443, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1497326203208556 }, { "epoch": 0.30668218085106386, "grad_norm": 261.1019482562245, "learning_rate": 1.9820339107854505e-07, "loss": 0.8301, "step": 1845, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.40575916230366493, "success_rate.epoch.env.math": 0.8408071748878924, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.649593106749641, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48236651273463643, "success_rate.epoch.global": 0.6089901477832512, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9955218068535826, "tokens_p.mean_in_band": 0.5590277777777778, "tokens_rate.above_band": 0.856, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.144 }, { "epoch": 0.30751329787234044, "grad_norm": 383.5429073159267, "learning_rate": 1.9819233803200846e-07, "loss": 0.9382, "step": 1850, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.40575916230366493, "success_rate.epoch.env.math": 0.8408071748878924, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.6494992846924177, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.482357983456707, "success_rate.epoch.global": 0.609029484029484, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9884615384615385, "tokens_p.mean_in_band": 0.5446428571428571, "tokens_rate.above_band": 0.8227848101265823, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17721518987341772 }, { "epoch": 0.308344414893617, "grad_norm": 171.78647100768598, "learning_rate": 1.9818125269852878e-07, "loss": 0.7666, "step": 1855, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4024390243902439, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.4073107049608355, "success_rate.epoch.env.math": 0.8411633109619687, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.6496906235126131, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48209713205073235, "success_rate.epoch.global": 0.6092552865461232, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9934623430962343, "tokens_p.mean_in_band": 0.7020596590909091, "tokens_rate.above_band": 0.9157088122605364, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0842911877394636 }, { "epoch": 0.3091755319148936, "grad_norm": 117.46381511606609, "learning_rate": 1.9817013509777976e-07, "loss": 0.6933, "step": 1860, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.40963855421686746, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.40625, "success_rate.epoch.env.math": 0.8411633109619687, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.6503795066413662, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48271783277750885, "success_rate.epoch.global": 0.6097188264058679, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0006325910931173, "tokens_p.mean_in_band": 0.58859375, "tokens_rate.above_band": 0.9367888748419722, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0632111251580278 }, { "epoch": 0.3100066489361702, "grad_norm": 237.6914534637881, "learning_rate": 1.9815898524949244e-07, "loss": 0.874, "step": 1865, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.40963855421686746, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.40625, "success_rate.epoch.env.math": 0.84, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.6502602934216754, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4826012396700852, "success_rate.epoch.global": 0.6097560975609756, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9833333333333333, "tokens_p.mean_in_band": 0.6727764423076923, "tokens_rate.above_band": 0.8015267175572519, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1984732824427481 }, { "epoch": 0.31083776595744683, "grad_norm": 264.4057550452638, "learning_rate": 1.9814780317345504e-07, "loss": 0.8081, "step": 1870, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.40963855421686746, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.4051948051948052, "success_rate.epoch.env.math": 0.84, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.6507786691835772, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4825524379388767, "success_rate.epoch.global": 0.609978703985397, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.3888888888888889, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.995583717357911, "tokens_p.mean_in_band": 0.6124674479166666, "tokens_rate.above_band": 0.9313304721030042, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06866952789699571 }, { "epoch": 0.3116688829787234, "grad_norm": 153.1849065261085, "learning_rate": 1.9813658888951304e-07, "loss": 0.7847, "step": 1875, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4235294117647059, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.4051948051948052, "success_rate.epoch.env.math": 0.8403547671840355, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.6516007532956686, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4839222296519645, "success_rate.epoch.global": 0.6109256449165402, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980283911671924, "tokens_p.mean_in_band": 0.6779399671052632, "tokens_rate.above_band": 0.9804123711340206, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01958762886597938 }, { "epoch": 0.3125, "grad_norm": 130.037019408199, "learning_rate": 1.9812534241756898e-07, "loss": 0.7954, "step": 1880, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4235294117647059, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.4051948051948052, "success_rate.epoch.env.math": 0.8407079646017699, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.651949271958666, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4839860220229401, "success_rate.epoch.global": 0.6113299000302939, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4285714285714286, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9950208649468892, "tokens_p.mean_in_band": 0.553203125, "tokens_rate.above_band": 0.9294781382228491, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07052186177715092 }, { "epoch": 0.3133311170212766, "grad_norm": 214.9477828823179, "learning_rate": 1.981140637775827e-07, "loss": 0.8792, "step": 1885, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4235294117647059, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4666666666666667, "success_rate.epoch.env.logic": 0.4051948051948052, "success_rate.epoch.env.math": 0.8410596026490066, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.6515222482435598, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4839791687804065, "success_rate.epoch.global": 0.6112454655380894, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9889112903225806, "tokens_p.mean_in_band": 0.5125558035714286, "tokens_rate.above_band": 0.8157894736842105, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18421052631578946 }, { "epoch": 0.31416223404255317, "grad_norm": 185.33333393201696, "learning_rate": 1.9810275298957092e-07, "loss": 0.9321, "step": 1890, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4235294117647059, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4605263157894737, "success_rate.epoch.env.logic": 0.40414507772020725, "success_rate.epoch.env.math": 0.8417582417582418, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.6510757717492984, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4833484491679686, "success_rate.epoch.global": 0.6108597285067874, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9985274869109948, "tokens_p.mean_in_band": 0.5329296875, "tokens_rate.above_band": 0.9683143219264893, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031685678073510776 }, { "epoch": 0.3149933510638298, "grad_norm": 139.5118804210538, "learning_rate": 1.9809141007360754e-07, "loss": 0.7876, "step": 1895, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4235294117647059, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4605263157894737, "success_rate.epoch.env.logic": 0.40414507772020725, "success_rate.epoch.env.math": 0.8427947598253275, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.65142323845077, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4834742659651102, "success_rate.epoch.global": 0.6114956364730665, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9864665354330708, "tokens_p.mean_in_band": 0.6157670454545454, "tokens_rate.above_band": 0.9202898550724637, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07971014492753623 }, { "epoch": 0.3158244680851064, "grad_norm": 207.0388021716609, "learning_rate": 1.9808003504982352e-07, "loss": 0.8993, "step": 1900, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4186046511627907, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4605263157894737, "success_rate.epoch.env.logic": 0.40414507772020725, "success_rate.epoch.env.math": 0.8431372549019608, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.6520726595249184, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48306748812325107, "success_rate.epoch.global": 0.6117117117117117, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9947183098591549, "tokens_p.mean_in_band": 0.5027817234848485, "tokens_rate.above_band": 0.9281045751633987, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0718954248366013 }, { "epoch": 0.316655585106383, "grad_norm": 592.2308993935935, "learning_rate": 1.9806862793840667e-07, "loss": 0.7883, "step": 1905, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4186046511627907, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.375, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.40359897172236503, "success_rate.epoch.env.math": 0.841648590021692, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.652396463471382, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4823682310163722, "success_rate.epoch.global": 0.6114439784301977, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.36666666666666664, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.99769079345851, "tokens_p.mean_below_band": 1.9190338207408786e-10, "tokens_p.mean_in_band": 0.653076171875, "tokens_rate.above_band": 0.9711764705882353, "tokens_rate.below_band": 0.000588235294117647, "tokens_rate.in_band": 0.02823529411764706 }, { "epoch": 0.31748670212765956, "grad_norm": 131.64471110232353, "learning_rate": 1.9805718875960181e-07, "loss": 0.6776, "step": 1910, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4186046511627907, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.40359897172236503, "success_rate.epoch.env.math": 0.8423326133909287, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.6516488620529494, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.47921030031429446, "success_rate.epoch.global": 0.6111775254034668, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9975679916317992, "tokens_p.mean_below_band": 1.1874362826347351e-08, "tokens_p.mean_in_band": 0.534545216480447, "tokens_rate.above_band": 0.8690909090909091, "tokens_rate.below_band": 0.0007272727272727272, "tokens_rate.in_band": 0.13018181818181818 }, { "epoch": 0.31831781914893614, "grad_norm": 112.29823107983792, "learning_rate": 1.9804571753371078e-07, "loss": 0.9103, "step": 1915, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.42528735632183906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.4025641025641026, "success_rate.epoch.env.math": 0.843010752688172, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.6516697588126159, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4803700392659265, "success_rate.epoch.global": 0.6115086463923673, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.72, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9982130984042553, "tokens_p.mean_below_band": 5.3085386753082275e-08, "tokens_p.mean_in_band": 0.5998186383928571, "tokens_rate.above_band": 0.9628681177976952, "tokens_rate.below_band": 0.0012804097311139564, "tokens_rate.in_band": 0.03585147247119078 }, { "epoch": 0.3191489361702128, "grad_norm": 617.4480033444044, "learning_rate": 1.9803421428109209e-07, "loss": 0.7003, "step": 1920, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42528735632183906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.4040920716112532, "success_rate.epoch.env.math": 0.8436830835117773, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.6524757056918093, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4838331270884897, "success_rate.epoch.global": 0.6125483199524234, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9895833333333334, "tokens_p.mean_in_band": 0.6716056034482759, "tokens_rate.above_band": 0.903010033444816, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09698996655518395 }, { "epoch": 0.31998005319148937, "grad_norm": 175.8314906467214, "learning_rate": 1.9802267902216124e-07, "loss": 0.7462, "step": 1925, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42045454545454547, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.40561224489795916, "success_rate.epoch.env.math": 0.8443496801705757, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.6520332717190388, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4835523566435296, "success_rate.epoch.global": 0.6124629080118694, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.65, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9935810810810811, "tokens_p.mean_in_band": 0.7121744791666667, "tokens_rate.above_band": 0.925, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.075 }, { "epoch": 0.32081117021276595, "grad_norm": 174.71926594610449, "learning_rate": 1.9801111177739047e-07, "loss": 0.9158, "step": 1930, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42045454545454547, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46835443037974683, "success_rate.epoch.env.logic": 0.40458015267175573, "success_rate.epoch.env.math": 0.8446808510638298, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.65283540802213, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4841811915354776, "success_rate.epoch.global": 0.613084665482534, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.993834279630944, "tokens_p.mean_in_band": 0.6550399116847826, "tokens_rate.above_band": 0.8844946641556811, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1155053358443189 }, { "epoch": 0.32164228723404253, "grad_norm": 223.9156515493342, "learning_rate": 1.9799951256730882e-07, "loss": 0.8614, "step": 1935, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42045454545454547, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.475, "success_rate.epoch.env.logic": 0.40458015267175573, "success_rate.epoch.env.math": 0.8453389830508474, "success_rate.epoch.env.sat": 0.08275862068965517, "success_rate.epoch.env.science": 0.6531738730450782, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4848236873524358, "success_rate.epoch.global": 0.6135222911130794, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9953931203931204, "tokens_p.mean_in_band": 0.6908967391304348, "tokens_rate.above_band": 0.9465116279069767, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.053488372093023255 }, { "epoch": 0.3224734042553192, "grad_norm": 218.20803178592098, "learning_rate": 1.9798788141250203e-07, "loss": 0.9407, "step": 1940, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42045454545454547, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.475, "success_rate.epoch.env.logic": 0.4035532994923858, "success_rate.epoch.env.math": 0.8456659619450317, "success_rate.epoch.env.sat": 0.0821917808219178, "success_rate.epoch.env.science": 0.6531924666972898, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4847102218523712, "success_rate.epoch.global": 0.6133215443560271, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.2333333333333333, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9949742268041237, "tokens_p.mean_in_band": 0.6322544642857143, "tokens_rate.above_band": 0.885036496350365, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11496350364963503 }, { "epoch": 0.32330452127659576, "grad_norm": 154.42341866073517, "learning_rate": 1.9797621833361263e-07, "loss": 0.9645, "step": 1945, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42045454545454547, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.475, "success_rate.epoch.env.logic": 0.4035532994923858, "success_rate.epoch.env.math": 0.8456659619450317, "success_rate.epoch.env.sat": 0.08163265306122448, "success_rate.epoch.env.science": 0.6535288725939505, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48468997441018646, "success_rate.epoch.global": 0.6134157105030892, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.39285714285714285, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9960365853658537, "tokens_p.mean_in_band": 0.3281324546755725, "tokens_rate.above_band": 0.701254275940707, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.29874572405929306 }, { "epoch": 0.32413563829787234, "grad_norm": 176.34030738672908, "learning_rate": 1.979645233513396e-07, "loss": 0.7489, "step": 1950, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42045454545454547, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.475, "success_rate.epoch.env.logic": 0.4050632911392405, "success_rate.epoch.env.math": 0.8459915611814346, "success_rate.epoch.env.sat": 0.08163265306122448, "success_rate.epoch.env.science": 0.654337899543379, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48493039421315787, "success_rate.epoch.global": 0.6142563801701378, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9910402097902098, "tokens_p.mean_in_band": 0.5890625, "tokens_rate.above_band": 0.8773006134969326, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12269938650306748 }, { "epoch": 0.3249667553191489, "grad_norm": 126.23954534188171, "learning_rate": 1.9795279648643874e-07, "loss": 0.809, "step": 1955, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42696629213483145, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.475, "success_rate.epoch.env.logic": 0.40404040404040403, "success_rate.epoch.env.math": 0.8459915611814346, "success_rate.epoch.env.sat": 0.08163265306122448, "success_rate.epoch.env.science": 0.6548269581056466, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48547384131713217, "success_rate.epoch.global": 0.6145741878841089, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9994544887780549, "tokens_p.mean_in_band": 0.6108141447368421, "tokens_rate.above_band": 0.9547619047619048, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04523809523809524 }, { "epoch": 0.3257978723404255, "grad_norm": 180.16353002658522, "learning_rate": 1.9794103775972237e-07, "loss": 0.7907, "step": 1960, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42696629213483145, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.475, "success_rate.epoch.env.logic": 0.40554156171284633, "success_rate.epoch.env.math": 0.8459915611814346, "success_rate.epoch.env.sat": 0.08163265306122448, "success_rate.epoch.env.science": 0.6548592188919165, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4856132429951969, "success_rate.epoch.global": 0.6147780373831776, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.6785714285714286, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.995677537250492, "tokens_p.mean_in_band": 0.5518613565488566, "tokens_rate.above_band": 0.8808816245666171, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11911837543338286 }, { "epoch": 0.32662898936170215, "grad_norm": 164.9703329127884, "learning_rate": 1.9792924719205932e-07, "loss": 0.7462, "step": 1965, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42696629213483145, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.475, "success_rate.epoch.env.logic": 0.40554156171284633, "success_rate.epoch.env.math": 0.8466386554621849, "success_rate.epoch.env.sat": 0.08783783783783784, "success_rate.epoch.env.science": 0.655328798185941, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48627886648168667, "success_rate.epoch.global": 0.6154518950437318, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9901620370370371, "tokens_p.mean_below_band": 2.648448571562767e-09, "tokens_p.mean_in_band": 0.7585227272727273, "tokens_rate.above_band": 0.9310344827586207, "tokens_rate.below_band": 0.005747126436781609, "tokens_rate.in_band": 0.06321839080459771 }, { "epoch": 0.32746010638297873, "grad_norm": 461.95040314697394, "learning_rate": 1.9791742480437498e-07, "loss": 0.5795, "step": 1970, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42696629213483145, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.475, "success_rate.epoch.env.logic": 0.4045226130653266, "success_rate.epoch.env.math": 0.8466386554621849, "success_rate.epoch.env.sat": 0.08783783783783784, "success_rate.epoch.env.science": 0.6561085972850679, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48625712561365103, "success_rate.epoch.global": 0.6158323632130385, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9953342013888888, "tokens_p.mean_in_band": 0.6043113425925926, "tokens_rate.above_band": 0.9552238805970149, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04477611940298507 }, { "epoch": 0.3282912234042553, "grad_norm": 98.41478633184958, "learning_rate": 1.9790557061765116e-07, "loss": 0.8591, "step": 1975, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42696629213483145, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.475, "success_rate.epoch.env.logic": 0.40601503759398494, "success_rate.epoch.env.math": 0.8466386554621849, "success_rate.epoch.env.sat": 0.08783783783783784, "success_rate.epoch.env.science": 0.6565884476534296, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4864364233315619, "success_rate.epoch.global": 0.6163229741504502, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9995124113475178, "tokens_p.mean_in_band": 0.6399305555555556, "tokens_rate.above_band": 0.94, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06 }, { "epoch": 0.3291223404255319, "grad_norm": 155.1278879482167, "learning_rate": 1.9789368465292617e-07, "loss": 0.8568, "step": 1980, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42696629213483145, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.475, "success_rate.epoch.env.logic": 0.40601503759398494, "success_rate.epoch.env.math": 0.8469601677148847, "success_rate.epoch.env.sat": 0.08783783783783784, "success_rate.epoch.env.science": 0.656770130454341, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48648216833643565, "success_rate.epoch.global": 0.6166328600405679, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.6666666666666666, "tokens_p.mean_above_band": 0.9971676737160121, "tokens_p.mean_in_band": 0.6021075581395349, "tokens_rate.above_band": 0.9390070921985816, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06099290780141844 }, { "epoch": 0.3299534574468085, "grad_norm": 129.8109960608089, "learning_rate": 1.978817669312947e-07, "loss": 0.72, "step": 1985, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42696629213483145, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48148148148148145, "success_rate.epoch.env.logic": 0.40601503759398494, "success_rate.epoch.env.math": 0.8472803347280334, "success_rate.epoch.env.sat": 0.08783783783783784, "success_rate.epoch.env.science": 0.6567834681042228, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4871017125313913, "success_rate.epoch.global": 0.6168981481481481, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.99528125, "tokens_p.mean_in_band": 0.5909288194444444, "tokens_rate.above_band": 0.9652509652509652, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03474903474903475 }, { "epoch": 0.3307845744680851, "grad_norm": 104.26200129000952, "learning_rate": 1.9786981747390774e-07, "loss": 0.6822, "step": 1990, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42696629213483145, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48148148148148145, "success_rate.epoch.env.logic": 0.40601503759398494, "success_rate.epoch.env.math": 0.8475991649269311, "success_rate.epoch.env.sat": 0.087248322147651, "success_rate.epoch.env.science": 0.6577060931899642, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48716097976725065, "success_rate.epoch.global": 0.6174942263279446, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.990506329113924, "tokens_p.mean_in_band": 0.6486979166666667, "tokens_rate.above_band": 0.8404255319148937, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1595744680851064 }, { "epoch": 0.3316156914893617, "grad_norm": 159.86115952178918, "learning_rate": 1.9785783630197266e-07, "loss": 0.7802, "step": 1995, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42696629213483145, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48148148148148145, "success_rate.epoch.env.logic": 0.405, "success_rate.epoch.env.math": 0.8475991649269311, "success_rate.epoch.env.sat": 0.08666666666666667, "success_rate.epoch.env.science": 0.6578829834747655, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48703190678632624, "success_rate.epoch.global": 0.6173337172473365, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4047619047619048, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9964025658807212, "tokens_p.mean_below_band": 7.566995918750763e-10, "tokens_p.mean_in_band": 0.6957720588235294, "tokens_rate.above_band": 0.9327296248382924, "tokens_rate.below_band": 0.00129366106080207, "tokens_rate.in_band": 0.06597671410090557 }, { "epoch": 0.3324468085106383, "grad_norm": 134.60845839540934, "learning_rate": 1.9784582343675314e-07, "loss": 0.9367, "step": 2000, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42696629213483145, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47560975609756095, "success_rate.epoch.env.logic": 0.40648379052369077, "success_rate.epoch.env.math": 0.8479166666666667, "success_rate.epoch.env.sat": 0.08666666666666667, "success_rate.epoch.env.science": 0.6583518930957684, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4867044955590998, "success_rate.epoch.global": 0.6177484204480184, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9941907051282052, "tokens_p.mean_below_band": 1.545430450278218e-13, "tokens_p.mean_in_band": 0.7065972222222222, "tokens_rate.above_band": 0.9425981873111783, "tokens_rate.below_band": 0.0030211480362537764, "tokens_rate.in_band": 0.054380664652567974 }, { "epoch": 0.3332779255319149, "grad_norm": 144.5416703421745, "learning_rate": 1.97833778899569e-07, "loss": 0.8982, "step": 2005, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42696629213483145, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47560975609756095, "success_rate.epoch.env.logic": 0.40648379052369077, "success_rate.epoch.env.math": 0.8482328482328483, "success_rate.epoch.env.sat": 0.08666666666666667, "success_rate.epoch.env.science": 0.6589698046181173, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48678941311260254, "success_rate.epoch.global": 0.6183381088825215, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9868055555555556, "tokens_p.mean_in_band": 0.65640625, "tokens_rate.above_band": 0.84375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15625 }, { "epoch": 0.3341090425531915, "grad_norm": 193.94001096760982, "learning_rate": 1.9782170271179645e-07, "loss": 0.939, "step": 2010, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.42696629213483145, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47560975609756095, "success_rate.epoch.env.logic": 0.4034653465346535, "success_rate.epoch.env.math": 0.8485477178423236, "success_rate.epoch.env.sat": 0.08609271523178808, "success_rate.epoch.env.science": 0.6594235033259424, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4865327015574557, "success_rate.epoch.global": 0.6180674671240709, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.25, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9938650306748467, "tokens_p.mean_in_band": 0.6032968101659751, "tokens_rate.above_band": 0.8256150506512301, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17438494934876989 }, { "epoch": 0.3349401595744681, "grad_norm": 156.6257582971536, "learning_rate": 1.978095948948677e-07, "loss": 0.6731, "step": 2015, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4222222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47560975609756095, "success_rate.epoch.env.logic": 0.4049382716049383, "success_rate.epoch.env.math": 0.8488612836438924, "success_rate.epoch.env.sat": 0.08552631578947369, "success_rate.epoch.env.science": 0.6597345132743363, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4862406135088487, "success_rate.epoch.global": 0.6181921870544625, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.5599999999999999, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9958984375, "tokens_p.mean_in_band": 0.641633064516129, "tokens_rate.above_band": 0.9116809116809117, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08831908831908832 }, { "epoch": 0.3357712765957447, "grad_norm": 141.13826303276414, "learning_rate": 1.9779745547027115e-07, "loss": 0.8363, "step": 2020, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4175824175824176, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47560975609756095, "success_rate.epoch.env.logic": 0.4049382716049383, "success_rate.epoch.env.math": 0.8488612836438924, "success_rate.epoch.env.sat": 0.08552631578947369, "success_rate.epoch.env.science": 0.6601941747572816, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48586060049458873, "success_rate.epoch.global": 0.6183836084234491, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9948850896860987, "tokens_p.mean_in_band": 0.7233072916666666, "tokens_rate.above_band": 0.9674620390455532, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03253796095444685 }, { "epoch": 0.33660239361702127, "grad_norm": 219.97975682352043, "learning_rate": 1.977852844595513e-07, "loss": 0.7148, "step": 2025, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4175824175824176, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47560975609756095, "success_rate.epoch.env.logic": 0.4049382716049383, "success_rate.epoch.env.math": 0.8488612836438924, "success_rate.epoch.env.sat": 0.08496732026143791, "success_rate.epoch.env.science": 0.6609423161602818, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48587779557413097, "success_rate.epoch.global": 0.61875, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9985835694050992, "tokens_p.mean_in_band": 0.5753255208333333, "tokens_rate.above_band": 0.9216710182767625, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0783289817232376 }, { "epoch": 0.33743351063829785, "grad_norm": 335.72442402858115, "learning_rate": 1.9777308188430876e-07, "loss": 0.9032, "step": 2030, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4175824175824176, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47560975609756095, "success_rate.epoch.env.logic": 0.4039408866995074, "success_rate.epoch.env.math": 0.8477366255144033, "success_rate.epoch.env.sat": 0.08496732026143791, "success_rate.epoch.env.science": 0.6609498680738787, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48568556910855615, "success_rate.epoch.global": 0.6186560816557981, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.47916666666666663, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9930803571428571, "tokens_p.mean_in_band": 0.703046875, "tokens_rate.above_band": 0.8484848484848485, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15151515151515152 }, { "epoch": 0.3382646276595745, "grad_norm": 180.70770770572085, "learning_rate": 1.9776084776620008e-07, "loss": 0.7969, "step": 2035, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4175824175824176, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47560975609756095, "success_rate.epoch.env.logic": 0.40294840294840295, "success_rate.epoch.env.math": 0.8480492813141683, "success_rate.epoch.env.sat": 0.08496732026143791, "success_rate.epoch.env.science": 0.6615452151009658, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48567788902453324, "success_rate.epoch.global": 0.6190206623266346, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0005196523053665, "tokens_p.mean_in_band": 0.5813210227272727, "tokens_rate.above_band": 0.9678127286027798, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03218727139722019 }, { "epoch": 0.3390957446808511, "grad_norm": 170.82886261002986, "learning_rate": 1.977485821269378e-07, "loss": 0.8812, "step": 2040, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4175824175824176, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4819277108433735, "success_rate.epoch.env.logic": 0.40294840294840295, "success_rate.epoch.env.math": 0.8486707566462167, "success_rate.epoch.env.sat": 0.08496732026143791, "success_rate.epoch.env.science": 0.6621384750219106, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48636267902442465, "success_rate.epoch.global": 0.619774011299435, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.988747553816047, "tokens_p.mean_in_band": 0.80810546875, "tokens_rate.above_band": 0.9012345679012346, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09876543209876543 }, { "epoch": 0.33992686170212766, "grad_norm": 133.08859243571013, "learning_rate": 1.977362849882905e-07, "loss": 0.9156, "step": 2045, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4175824175824176, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4819277108433735, "success_rate.epoch.env.logic": 0.40294840294840295, "success_rate.epoch.env.math": 0.8489795918367347, "success_rate.epoch.env.sat": 0.08441558441558442, "success_rate.epoch.env.science": 0.661706783369803, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4863013524510208, "success_rate.epoch.global": 0.6194640338504936, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.525, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.996549317617866, "tokens_p.mean_below_band": 7.729977369308472e-08, "tokens_p.mean_in_band": 0.6587171052631579, "tokens_rate.above_band": 0.912797281993205, "tokens_rate.below_band": 0.0011325028312570782, "tokens_rate.in_band": 0.08607021517553794 }, { "epoch": 0.34075797872340424, "grad_norm": 171.3487937304551, "learning_rate": 1.977239563720825e-07, "loss": 0.7762, "step": 2050, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.41304347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4819277108433735, "success_rate.epoch.env.logic": 0.4019607843137255, "success_rate.epoch.env.math": 0.8492871690427699, "success_rate.epoch.env.sat": 0.08387096774193549, "success_rate.epoch.env.science": 0.6620026235242676, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4858042836083504, "success_rate.epoch.global": 0.6192621796676993, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9983705098493627, "tokens_p.mean_in_band": 0.5961277173913043, "tokens_rate.above_band": 0.9493949394939494, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.050605060506050605 }, { "epoch": 0.3415890957446808, "grad_norm": 201.94683922299325, "learning_rate": 1.9771159630019412e-07, "loss": 0.7822, "step": 2055, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.41304347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4019607843137255, "success_rate.epoch.env.math": 0.847870182555781, "success_rate.epoch.env.sat": 0.08387096774193549, "success_rate.epoch.env.science": 0.6621562636403318, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4862501182338903, "success_rate.epoch.global": 0.6194491287240023, "success_rate.window.env.ded": 0.5, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6555555555555556, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9934612771739131, "tokens_p.mean_below_band": 1.2014061212539673e-07, "tokens_p.mean_in_band": 0.5257874750996016, "tokens_rate.above_band": 0.8797513746115229, "tokens_rate.below_band": 0.0002390628735357399, "tokens_rate.in_band": 0.12000956251494142 }, { "epoch": 0.34242021276595747, "grad_norm": 124.6837014424957, "learning_rate": 1.9769920479456145e-07, "loss": 0.7904, "step": 2060, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.41304347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4019607843137255, "success_rate.epoch.env.math": 0.8487903225806451, "success_rate.epoch.env.sat": 0.08387096774193549, "success_rate.epoch.env.science": 0.6617326948193296, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4862952610706051, "success_rate.epoch.global": 0.6195682646481637, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9869025735294118, "tokens_p.mean_below_band": 2.8405338525772095e-08, "tokens_p.mean_in_band": 0.6170099431818182, "tokens_rate.above_band": 0.8553459119496856, "tokens_rate.below_band": 0.006289308176100629, "tokens_rate.in_band": 0.13836477987421383 }, { "epoch": 0.34325132978723405, "grad_norm": 213.16549747677635, "learning_rate": 1.9768678187717638e-07, "loss": 0.7796, "step": 2065, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.41935483870967744, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.40097799511002447, "success_rate.epoch.env.math": 0.8487903225806451, "success_rate.epoch.env.sat": 0.08387096774193549, "success_rate.epoch.env.science": 0.661598609904431, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.486767487100624, "success_rate.epoch.global": 0.6194739787353106, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9985085756897838, "tokens_p.mean_in_band": 0.4836881868131868, "tokens_rate.above_band": 0.9364525139664804, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06354748603351955 }, { "epoch": 0.34408244680851063, "grad_norm": 116.84908200473514, "learning_rate": 1.9767432757008655e-07, "loss": 0.7543, "step": 2070, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.41935483870967744, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4024390243902439, "success_rate.epoch.env.math": 0.8487903225806451, "success_rate.epoch.env.sat": 0.08387096774193549, "success_rate.epoch.env.science": 0.6620390455531453, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48694034754870885, "success_rate.epoch.global": 0.6200055881531154, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5666666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9962548324742269, "tokens_p.mean_below_band": 1.9190338207408786e-10, "tokens_p.mean_in_band": 0.6834635416666667, "tokens_rate.above_band": 0.9615861214374225, "tokens_rate.below_band": 0.0012391573729863693, "tokens_rate.in_band": 0.03717472118959108 }, { "epoch": 0.3449135638297872, "grad_norm": 138.59798139190588, "learning_rate": 1.976618418953953e-07, "loss": 0.7698, "step": 2075, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.41935483870967744, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4024390243902439, "success_rate.epoch.env.math": 0.8490945674044266, "success_rate.epoch.env.sat": 0.08387096774193549, "success_rate.epoch.env.science": 0.6626297577854672, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4870217072810819, "success_rate.epoch.global": 0.6205742960691386, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9933035714285714, "tokens_p.mean_in_band": 0.6496803977272727, "tokens_rate.above_band": 0.8166666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18333333333333332 }, { "epoch": 0.34574468085106386, "grad_norm": 138.28187139862686, "learning_rate": 1.9764932487526163e-07, "loss": 0.6239, "step": 2080, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49411764705882355, "success_rate.epoch.env.logic": 0.4024390243902439, "success_rate.epoch.env.math": 0.8490945674044266, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.6626349892008639, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4871152331248982, "success_rate.epoch.global": 0.6203729473977178, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9909950071326676, "tokens_p.mean_in_band": 0.7669270833333334, "tokens_rate.above_band": 0.9033505154639175, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09664948453608248 }, { "epoch": 0.34657579787234044, "grad_norm": 207.4961467261952, "learning_rate": 1.9763677653190018e-07, "loss": 0.676, "step": 2085, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49411764705882355, "success_rate.epoch.env.logic": 0.4024390243902439, "success_rate.epoch.env.math": 0.845691382765531, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.6635071090047393, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4868851363217146, "success_rate.epoch.global": 0.6206609275201332, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.375, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9891939252336449, "tokens_p.mean_below_band": 8.149072527885437e-09, "tokens_p.mean_in_band": 0.69921875, "tokens_rate.above_band": 0.816793893129771, "tokens_rate.below_band": 0.007633587786259542, "tokens_rate.in_band": 0.17557251908396945 }, { "epoch": 0.347406914893617, "grad_norm": 162.35393397977262, "learning_rate": 1.9762419688758123e-07, "loss": 0.6411, "step": 2090, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49411764705882355, "success_rate.epoch.env.logic": 0.4053398058252427, "success_rate.epoch.env.math": 0.845691382765531, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.6638005159071367, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48717551707965967, "success_rate.epoch.global": 0.621119733924612, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9891561514195584, "tokens_p.mean_in_band": 0.6729835304054054, "tokens_rate.above_band": 0.8107416879795396, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18925831202046037 }, { "epoch": 0.3482380319148936, "grad_norm": 159.10187801087, "learning_rate": 1.9761158596463051e-07, "loss": 0.8077, "step": 2095, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49411764705882355, "success_rate.epoch.env.logic": 0.4053398058252427, "success_rate.epoch.env.math": 0.845691382765531, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.6646655231560892, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48725415410229167, "success_rate.epoch.global": 0.62174875484228, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9967811158798283, "tokens_p.mean_in_band": 0.5761284722222222, "tokens_rate.above_band": 0.9628099173553719, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0371900826446281 }, { "epoch": 0.3490691489361702, "grad_norm": 200.00058197347562, "learning_rate": 1.9759894378542938e-07, "loss": 0.8468, "step": 2100, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49411764705882355, "success_rate.epoch.env.logic": 0.40481927710843374, "success_rate.epoch.env.math": 0.846307385229541, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.6645271715875053, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4872502561185296, "success_rate.epoch.global": 0.6216887417218543, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.6444444444444445, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9995993589743589, "tokens_p.mean_in_band": 0.44331498579545453, "tokens_rate.above_band": 0.9497716894977168, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0502283105022831 }, { "epoch": 0.34990026595744683, "grad_norm": 86.50107371896644, "learning_rate": 1.9758627037241456e-07, "loss": 0.6994, "step": 2105, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4883720930232558, "success_rate.epoch.env.logic": 0.40481927710843374, "success_rate.epoch.env.math": 0.8466135458167331, "success_rate.epoch.env.sat": 0.08280254777070063, "success_rate.epoch.env.science": 0.6652433817250214, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48677262258457576, "success_rate.epoch.global": 0.6219713656387665, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4642857142857143, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9886060767590619, "tokens_p.mean_in_band": 0.7242268041237113, "tokens_rate.above_band": 0.8286219081272085, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17137809187279152 }, { "epoch": 0.3507313829787234, "grad_norm": 72.33977276892092, "learning_rate": 1.9757356574807827e-07, "loss": 0.768, "step": 2110, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4148936170212766, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4883720930232558, "success_rate.epoch.env.logic": 0.40384615384615385, "success_rate.epoch.env.math": 0.8472222222222222, "success_rate.epoch.env.sat": 0.08227848101265822, "success_rate.epoch.env.science": 0.6656716417910448, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4867307813528658, "success_rate.epoch.global": 0.6221489420170376, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9960106382978723, "tokens_p.mean_in_band": 0.5833610372340425, "tokens_rate.above_band": 0.9375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0625 }, { "epoch": 0.3515625, "grad_norm": 122.91251426632387, "learning_rate": 1.975608299349681e-07, "loss": 0.656, "step": 2115, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4105263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4883720930232558, "success_rate.epoch.env.logic": 0.4028776978417266, "success_rate.epoch.env.math": 0.8481262327416174, "success_rate.epoch.env.sat": 0.08227848101265822, "success_rate.epoch.env.science": 0.6659574468085107, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48635387756201404, "success_rate.epoch.global": 0.6223622910386407, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9955778301886793, "tokens_p.mean_in_band": 0.67890625, "tokens_rate.above_band": 0.9695121951219512, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03048780487804878 }, { "epoch": 0.3523936170212766, "grad_norm": 145.75762946279514, "learning_rate": 1.9754806295568703e-07, "loss": 0.7459, "step": 2120, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4105263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4883720930232558, "success_rate.epoch.env.logic": 0.4028776978417266, "success_rate.epoch.env.math": 0.8481262327416174, "success_rate.epoch.env.sat": 0.08176100628930817, "success_rate.epoch.env.science": 0.6655333616659583, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48626828121056836, "success_rate.epoch.global": 0.6219545578976184, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.29166666666666663, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9917355371900827, "tokens_p.mean_in_band": 0.6981969762731481, "tokens_rate.above_band": 0.8175675675675675, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18243243243243243 }, { "epoch": 0.35322473404255317, "grad_norm": 166.58920362574537, "learning_rate": 1.9753526483289325e-07, "loss": 0.8161, "step": 2125, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4105263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4883720930232558, "success_rate.epoch.env.logic": 0.4019138755980861, "success_rate.epoch.env.math": 0.8487229862475442, "success_rate.epoch.env.sat": 0.08176100628930817, "success_rate.epoch.env.science": 0.6658184902459712, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48626083210532284, "success_rate.epoch.global": 0.6222343621961213, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994140625, "tokens_p.mean_in_band": 0.5240625, "tokens_rate.above_band": 0.9768732654949122, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02312673450508788 }, { "epoch": 0.3540558510638298, "grad_norm": 84.31554724364025, "learning_rate": 1.9752243558930039e-07, "loss": 0.6842, "step": 2130, "success_rate.epoch.env.abd": 0.3684210526315789, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4883720930232558, "success_rate.epoch.env.logic": 0.4019138755980861, "success_rate.epoch.env.math": 0.8490196078431372, "success_rate.epoch.env.sat": 0.08125, "success_rate.epoch.env.science": 0.665961049957663, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.486812516277611, "success_rate.epoch.global": 0.6224100327153762, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9970238095238095, "tokens_p.mean_in_band": 0.5981685450819673, "tokens_rate.above_band": 0.8610478359908884, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13895216400911162 }, { "epoch": 0.3548869680851064, "grad_norm": 162.95437907151003, "learning_rate": 1.975095752476771e-07, "loss": 0.8477, "step": 2135, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4827586206896552, "success_rate.epoch.env.logic": 0.4033412887828162, "success_rate.epoch.env.math": 0.8493150684931506, "success_rate.epoch.env.sat": 0.08125, "success_rate.epoch.env.science": 0.6658206429780034, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48477142008578417, "success_rate.epoch.global": 0.6222101252041372, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.42000000000000004, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9880216383307573, "tokens_p.mean_below_band": 2.676642907317728e-09, "tokens_p.mean_in_band": 0.684285121681416, "tokens_rate.above_band": 0.8490813648293963, "tokens_rate.below_band": 0.0026246719160104987, "tokens_rate.in_band": 0.14829396325459318 }, { "epoch": 0.355718085106383, "grad_norm": 169.48388464635582, "learning_rate": 1.9749668383084744e-07, "loss": 0.8139, "step": 2140, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.4166666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4827586206896552, "success_rate.epoch.env.logic": 0.4033412887828162, "success_rate.epoch.env.math": 0.8493150684931506, "success_rate.epoch.env.sat": 0.08125, "success_rate.epoch.env.science": 0.6663851351351351, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4848227375546143, "success_rate.epoch.global": 0.622620989668298, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9925595238095238, "tokens_p.mean_in_band": 0.6245659722222222, "tokens_rate.above_band": 0.875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.125 }, { "epoch": 0.35654920212765956, "grad_norm": 95.28194179703517, "learning_rate": 1.9748376136169046e-07, "loss": 0.734, "step": 2145, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.422680412371134, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4772727272727273, "success_rate.epoch.env.logic": 0.4033412887828162, "success_rate.epoch.env.math": 0.8493150684931506, "success_rate.epoch.env.sat": 0.08125, "success_rate.epoch.env.science": 0.6672283066554339, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4849473760825996, "success_rate.epoch.global": 0.6231687466087901, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4642857142857143, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9930527822151225, "tokens_p.mean_below_band": 1.4435499906539917e-07, "tokens_p.mean_in_band": 0.5313714378238342, "tokens_rate.above_band": 0.9065894279507604, "tokens_rate.below_band": 0.0002413709872073377, "tokens_rate.in_band": 0.09316920106203234 }, { "epoch": 0.35738031914893614, "grad_norm": 384.2611069554617, "learning_rate": 1.974708078631404e-07, "loss": 0.7637, "step": 2150, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.422680412371134, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4772727272727273, "success_rate.epoch.env.logic": 0.4023809523809524, "success_rate.epoch.env.math": 0.8499025341130604, "success_rate.epoch.env.sat": 0.08125, "success_rate.epoch.env.science": 0.6675073560319462, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48493884686392313, "success_rate.epoch.global": 0.6234434217650243, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9913015463917526, "tokens_p.mean_in_band": 0.6534090909090909, "tokens_rate.above_band": 0.8546255506607929, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14537444933920704 }, { "epoch": 0.3582114361702128, "grad_norm": 117.31698365846226, "learning_rate": 1.9745782335818653e-07, "loss": 0.6945, "step": 2155, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4772727272727273, "success_rate.epoch.env.logic": 0.4023809523809524, "success_rate.epoch.env.math": 0.8499025341130604, "success_rate.epoch.env.sat": 0.08074534161490683, "success_rate.epoch.env.science": 0.6677852348993288, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48545377747143076, "success_rate.epoch.global": 0.623615239124561, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.35714285714285715, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.996629213483146, "tokens_p.mean_in_band": 0.559188179347826, "tokens_rate.above_band": 0.9063136456211812, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09368635437881874 }, { "epoch": 0.35904255319148937, "grad_norm": 164.57010704848997, "learning_rate": 1.9744480786987316e-07, "loss": 0.77, "step": 2160, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4772727272727273, "success_rate.epoch.env.logic": 0.40380047505938244, "success_rate.epoch.env.math": 0.8504854368932039, "success_rate.epoch.env.sat": 0.08074534161490683, "success_rate.epoch.env.science": 0.6683417085427136, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48568640466251783, "success_rate.epoch.global": 0.6243257820927723, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9930245535714286, "tokens_p.mean_in_band": 0.6948784722222222, "tokens_rate.above_band": 0.9613733905579399, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03862660944206009 }, { "epoch": 0.35987367021276595, "grad_norm": 166.71312425883977, "learning_rate": 1.9743176142129963e-07, "loss": 0.6699, "step": 2165, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4772727272727273, "success_rate.epoch.env.logic": 0.4056603773584906, "success_rate.epoch.env.math": 0.8504854368932039, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.6683396068590548, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48580998398804404, "success_rate.epoch.global": 0.6242261103633917, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6166666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.99515625, "tokens_p.mean_in_band": 0.5082370923913043, "tokens_rate.above_band": 0.9456264775413712, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.054373522458628844 }, { "epoch": 0.36070478723404253, "grad_norm": 304.0544051190945, "learning_rate": 1.9741868403562014e-07, "loss": 0.7596, "step": 2170, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.42424242424242425, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4772727272727273, "success_rate.epoch.env.logic": 0.4047058823529412, "success_rate.epoch.env.math": 0.8507751937984496, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.6681969949916527, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48534304268834316, "success_rate.epoch.global": 0.62395917271018, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.39285714285714285, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.997300392670157, "tokens_p.mean_in_band": 0.4777260638297872, "tokens_rate.above_band": 0.9420468557336621, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05795314426633785 }, { "epoch": 0.3615359042553192, "grad_norm": 113.96124949939217, "learning_rate": 1.9740557573604387e-07, "loss": 0.6863, "step": 2175, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.42424242424242425, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4772727272727273, "success_rate.epoch.env.logic": 0.405152224824356, "success_rate.epoch.env.math": 0.851063829787234, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.6680567139282736, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48539710608805414, "success_rate.epoch.global": 0.6239270386266095, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.998046875, "tokens_p.mean_in_band": 0.5688802083333333, "tokens_rate.above_band": 0.9591280653950953, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04087193460490463 }, { "epoch": 0.36236702127659576, "grad_norm": 158.5559387645458, "learning_rate": 1.9739243654583485e-07, "loss": 0.7521, "step": 2180, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.42424242424242425, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4772727272727273, "success_rate.epoch.env.logic": 0.40420560747663553, "success_rate.epoch.env.math": 0.8513513513513513, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.6676372712146422, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48529905713376015, "success_rate.epoch.global": 0.6236616702355461, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9969254032258065, "tokens_p.mean_in_band": 0.5421875, "tokens_rate.above_band": 0.9393939393939394, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06060606060606061 }, { "epoch": 0.36319813829787234, "grad_norm": 104.46820216220819, "learning_rate": 1.9737926648831192e-07, "loss": 0.8485, "step": 2185, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.42424242424242425, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4772727272727273, "success_rate.epoch.env.logic": 0.4046511627906977, "success_rate.epoch.env.math": 0.8497109826589595, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.6679119966791199, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4852154127779553, "success_rate.epoch.global": 0.6236645299145299, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9964595921450151, "tokens_p.mean_in_band": 0.512668918918919, "tokens_rate.above_band": 0.9470672389127325, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05293276108726753 }, { "epoch": 0.3640292553191489, "grad_norm": 161.6217803672966, "learning_rate": 1.9736606558684872e-07, "loss": 0.5828, "step": 2190, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.42424242424242425, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4772727272727273, "success_rate.epoch.env.logic": 0.4046511627906977, "success_rate.epoch.env.math": 0.85, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.668185584092792, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4852665586647474, "success_rate.epoch.global": 0.624, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9961828175895765, "tokens_p.mean_below_band": 1.6916601452976465e-10, "tokens_p.mean_in_band": 0.7063210227272727, "tokens_rate.above_band": 0.9638932496075353, "tokens_rate.below_band": 0.0015698587127158557, "tokens_rate.in_band": 0.03453689167974882 }, { "epoch": 0.3648603723404255, "grad_norm": 278.57117942104793, "learning_rate": 1.973528338648736e-07, "loss": 0.7787, "step": 2195, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.42424242424242425, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4772727272727273, "success_rate.epoch.env.logic": 0.4060324825986079, "success_rate.epoch.env.math": 0.85, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.6681818181818182, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4853917908371962, "success_rate.epoch.global": 0.6241682193239286, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9908088235294118, "tokens_p.mean_in_band": 0.6768798828125, "tokens_rate.above_band": 0.8735177865612648, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12648221343873517 }, { "epoch": 0.36569148936170215, "grad_norm": 166.85460207699504, "learning_rate": 1.9733957134586957e-07, "loss": 0.9068, "step": 2200, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.42424242424242425, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47191011235955055, "success_rate.epoch.env.logic": 0.4060324825986079, "success_rate.epoch.env.math": 0.8505747126436781, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.6683168316831684, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48496880094918265, "success_rate.epoch.global": 0.6243358129649309, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9934572391767211, "tokens_p.mean_below_band": 7.729977369308472e-08, "tokens_p.mean_in_band": 0.6528111645299145, "tokens_rate.above_band": 0.8570559610705596, "tokens_rate.below_band": 0.0006082725060827251, "tokens_rate.in_band": 0.14233576642335766 }, { "epoch": 0.36652260638297873, "grad_norm": 450.9470110920482, "learning_rate": 1.9732627805337446e-07, "loss": 1.0085, "step": 2205, "success_rate.epoch.env.abd": 0.35, "success_rate.epoch.env.agentgym:alfworld": 0.42424242424242425, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47191011235955055, "success_rate.epoch.env.logic": 0.40415704387990764, "success_rate.epoch.env.math": 0.8505747126436781, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.6681762041992589, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4847855222034909, "success_rate.epoch.global": 0.6239724211084593, "success_rate.window.env.logic": 0.25, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.4583333333333333, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9988251879699248, "tokens_p.mean_in_band": 0.5187230603448276, "tokens_rate.above_band": 0.9520925110132159, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04790748898678414 }, { "epoch": 0.3673537234042553, "grad_norm": 489.9581590847949, "learning_rate": 1.9731295401098052e-07, "loss": 0.9225, "step": 2210, "success_rate.epoch.env.abd": 0.38095238095238093, "success_rate.epoch.env.agentgym:alfworld": 0.42424242424242425, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47191011235955055, "success_rate.epoch.env.logic": 0.4055299539170507, "success_rate.epoch.env.math": 0.8508604206500956, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.6678997122893547, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4877250228476761, "success_rate.epoch.global": 0.6241397564849126, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9950892857142857, "tokens_p.mean_in_band": 0.6326069078947368, "tokens_rate.above_band": 0.9364548494983278, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06354515050167224 }, { "epoch": 0.3681848404255319, "grad_norm": 99.90563541538353, "learning_rate": 1.972995992423347e-07, "loss": 0.6576, "step": 2215, "success_rate.epoch.env.abd": 0.4090909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.42424242424242425, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47191011235955055, "success_rate.epoch.env.logic": 0.4068965517241379, "success_rate.epoch.env.math": 0.851145038167939, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.6672131147540984, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4903707633866036, "success_rate.epoch.global": 0.6240760295670539, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.8571428571428572, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9904384328358209, "tokens_p.mean_in_band": 0.5766741071428572, "tokens_rate.above_band": 0.8844884488448845, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11551155115511551 }, { "epoch": 0.3690159574468085, "grad_norm": 175.67127626612353, "learning_rate": 1.9728621377113841e-07, "loss": 0.8039, "step": 2220, "success_rate.epoch.env.abd": 0.4090909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.42424242424242425, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47191011235955055, "success_rate.epoch.env.logic": 0.4096109839816934, "success_rate.epoch.env.math": 0.8514285714285714, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.6673486088379705, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49065562335042723, "success_rate.epoch.global": 0.6245059288537549, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.8095238095238096, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.990909090909091, "tokens_p.mean_in_band": 0.6293150436046512, "tokens_rate.above_band": 0.8647798742138365, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13522012578616352 }, { "epoch": 0.3698470744680851, "grad_norm": 97.7200196216115, "learning_rate": 1.9727279762114763e-07, "loss": 0.5809, "step": 2225, "success_rate.epoch.env.abd": 0.4090909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.42424242424242425, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.410958904109589, "success_rate.epoch.env.math": 0.8519924098671727, "success_rate.epoch.env.sat": 0.07975460122699386, "success_rate.epoch.env.science": 0.6677564364527994, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4913551632819091, "success_rate.epoch.global": 0.6250328687877991, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972112860892388, "tokens_p.mean_in_band": 0.6765252976190477, "tokens_rate.above_band": 0.9477611940298507, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05223880597014925 }, { "epoch": 0.3706781914893617, "grad_norm": 233.0538426510748, "learning_rate": 1.9725935081617274e-07, "loss": 0.7786, "step": 2230, "success_rate.epoch.env.abd": 0.4090909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.42, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.41002277904328016, "success_rate.epoch.env.math": 0.8519924098671727, "success_rate.epoch.env.sat": 0.07926829268292683, "success_rate.epoch.env.science": 0.667890656874745, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49085237806092236, "success_rate.epoch.global": 0.6246719160104987, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.2, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9976927252985885, "tokens_p.mean_in_band": 0.6255686313291139, "tokens_rate.above_band": 0.921, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.079 }, { "epoch": 0.3715093085106383, "grad_norm": 126.02605568941038, "learning_rate": 1.9724587338007846e-07, "loss": 0.9944, "step": 2235, "success_rate.epoch.env.abd": 0.4090909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.42, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.4090909090909091, "success_rate.epoch.env.math": 0.8519924098671727, "success_rate.epoch.env.sat": 0.07878787878787878, "success_rate.epoch.env.science": 0.6688364524003255, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4908099700316642, "success_rate.epoch.global": 0.625032731081435, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.2916666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9999074074074074, "tokens_p.mean_in_band": 0.5775991586538461, "tokens_rate.above_band": 0.9284731774415406, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07152682255845942 }, { "epoch": 0.3723404255319149, "grad_norm": 101.04668029304527, "learning_rate": 1.97232365336784e-07, "loss": 0.6937, "step": 2240, "success_rate.epoch.env.abd": 0.4090909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.42, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.41043083900226757, "success_rate.epoch.env.math": 0.8522727272727273, "success_rate.epoch.env.sat": 0.07878787878787878, "success_rate.epoch.env.science": 0.6691027202598457, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4879511683810369, "success_rate.epoch.global": 0.6252939639404234, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.994359756097561, "tokens_p.mean_in_band": 0.5496323529411765, "tokens_rate.above_band": 0.9601873536299765, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03981264637002342 }, { "epoch": 0.3731715425531915, "grad_norm": 259.93543710110924, "learning_rate": 1.972188267102628e-07, "loss": 0.6695, "step": 2245, "success_rate.epoch.env.abd": 0.4090909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.42574257425742573, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.41043083900226757, "success_rate.epoch.env.math": 0.8525519848771267, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.6696356275303643, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48905155108486015, "success_rate.epoch.global": 0.6260099035704978, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9955119680851063, "tokens_p.mean_in_band": 0.58466796875, "tokens_rate.above_band": 0.9494949494949495, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.050505050505050504 }, { "epoch": 0.3740026595744681, "grad_norm": 170.37953314414946, "learning_rate": 1.972052575245426e-07, "loss": 0.6322, "step": 2250, "success_rate.epoch.env.abd": 0.4090909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.42574257425742573, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.4117647058823529, "success_rate.epoch.env.math": 0.8531073446327684, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.6700363930448847, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48925973218942803, "success_rate.epoch.global": 0.6265938069216758, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971049046321526, "tokens_p.mean_in_band": 0.7662760416666666, "tokens_rate.above_band": 0.9839142091152815, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0160857908847185 }, { "epoch": 0.3748337765957447, "grad_norm": 191.85400134815322, "learning_rate": 1.9719165780370542e-07, "loss": 0.7102, "step": 2255, "success_rate.epoch.env.abd": 0.4090909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.42574257425742573, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.4117647058823529, "success_rate.epoch.env.math": 0.8533834586466166, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.6702986279257466, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4893086729980381, "success_rate.epoch.global": 0.6269160820992465, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976760284810127, "tokens_p.mean_in_band": 0.595703125, "tokens_rate.above_band": 0.948948948948949, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05105105105105105 }, { "epoch": 0.37566489361702127, "grad_norm": 125.06878636799533, "learning_rate": 1.9717802757188738e-07, "loss": 0.5586, "step": 2260, "success_rate.epoch.env.abd": 0.4090909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.42574257425742573, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.4117647058823529, "success_rate.epoch.env.math": 0.8533834586466166, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.670555108608206, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48933198942371625, "success_rate.epoch.global": 0.6271713767176562, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9858870967741935, "tokens_p.mean_in_band": 0.71875, "tokens_rate.above_band": 0.8211920529801324, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17880794701986755 }, { "epoch": 0.37649601063829785, "grad_norm": 352.14754221086093, "learning_rate": 1.9716436685327884e-07, "loss": 0.598, "step": 2265, "success_rate.epoch.env.abd": 0.4090909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.4215686274509804, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.4117647058823529, "success_rate.epoch.env.math": 0.8533834586466166, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.6709470304975923, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4889881689767108, "success_rate.epoch.global": 0.6273291925465838, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994589552238806, "tokens_p.mean_in_band": 0.6861979166666666, "tokens_rate.above_band": 0.925414364640884, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07458563535911603 }, { "epoch": 0.3773271276595745, "grad_norm": 201.4983490720875, "learning_rate": 1.971506756721242e-07, "loss": 0.6947, "step": 2270, "success_rate.epoch.env.abd": 0.4090909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.4174757281553398, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.41309255079006774, "success_rate.epoch.env.math": 0.8536585365853658, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.671205446535843, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48878529984844493, "success_rate.epoch.global": 0.6275826446280992, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9956955922865014, "tokens_p.mean_in_band": 0.6378173828125, "tokens_rate.above_band": 0.9577836411609498, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04221635883905013 }, { "epoch": 0.3781582446808511, "grad_norm": 83.74577021972645, "learning_rate": 1.9713695405272197e-07, "loss": 0.6504, "step": 2275, "success_rate.epoch.env.abd": 0.4090909090909091, "success_rate.epoch.env.agentgym:alfworld": 0.4174757281553398, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.4144144144144144, "success_rate.epoch.env.math": 0.8542056074766355, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.6709316273490604, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.48893031033288425, "success_rate.epoch.global": 0.6277391080175303, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.7999999999999999, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9937770562770563, "tokens_p.mean_in_band": 0.56536865234375, "tokens_rate.above_band": 0.9352226720647774, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06477732793522267 }, { "epoch": 0.37898936170212766, "grad_norm": 242.0518233652568, "learning_rate": 1.9712320201942466e-07, "loss": 0.6159, "step": 2280, "success_rate.epoch.env.abd": 0.43478260869565216, "success_rate.epoch.env.agentgym:alfworld": 0.4174757281553398, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.4157303370786517, "success_rate.epoch.env.math": 0.8544776119402985, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.6713258785942492, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49144611742177813, "success_rate.epoch.global": 0.6283140283140283, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.992468134414832, "tokens_p.mean_in_band": 0.6107474662162162, "tokens_rate.above_band": 0.9588888888888889, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04111111111111111 }, { "epoch": 0.37982047872340424, "grad_norm": 171.3749426959497, "learning_rate": 1.9710941959663876e-07, "loss": 0.8784, "step": 2285, "success_rate.epoch.env.abd": 0.43478260869565216, "success_rate.epoch.env.agentgym:alfworld": 0.4174757281553398, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.4170403587443946, "success_rate.epoch.env.math": 0.8550185873605948, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.671451355661882, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4916257968902938, "success_rate.epoch.global": 0.6287255909558068, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.575, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9965523785425101, "tokens_p.mean_in_band": 0.4839618389423077, "tokens_rate.above_band": 0.95, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05 }, { "epoch": 0.3806515957446808, "grad_norm": 216.32496356539426, "learning_rate": 1.970956068088247e-07, "loss": 0.8025, "step": 2290, "success_rate.epoch.env.abd": 0.43478260869565216, "success_rate.epoch.env.agentgym:alfworld": 0.4174757281553398, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.41834451901565994, "success_rate.epoch.env.math": 0.8558225508317929, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.671047391477499, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4917807204864829, "success_rate.epoch.global": 0.6288791997948192, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9883052147239264, "tokens_p.mean_below_band": 1.0800249583553523e-11, "tokens_p.mean_in_band": 0.6879111842105263, "tokens_rate.above_band": 0.8907103825136612, "tokens_rate.below_band": 0.00546448087431694, "tokens_rate.in_band": 0.10382513661202186 }, { "epoch": 0.38148271276595747, "grad_norm": 113.91064570835519, "learning_rate": 1.9708176368049677e-07, "loss": 0.6917, "step": 2295, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.41346153846153844, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.41834451901565994, "success_rate.epoch.env.math": 0.8560885608856088, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.6713091922005571, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4936047519156423, "success_rate.epoch.global": 0.6290983606557377, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9960133744855967, "tokens_p.mean_in_band": 0.6800944010416666, "tokens_rate.above_band": 0.9681274900398407, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03187250996015936 }, { "epoch": 0.38231382978723405, "grad_norm": 320.1123134365402, "learning_rate": 1.970678902362232e-07, "loss": 0.7575, "step": 2300, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4095238095238095, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.3, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.41964285714285715, "success_rate.epoch.env.math": 0.8550458715596331, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.6714342471195868, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4932813859867808, "success_rate.epoch.global": 0.6291847687196525, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5625, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.995844414893617, "tokens_p.mean_in_band": 0.6862571022727273, "tokens_rate.above_band": 0.9447236180904522, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05527638190954774 }, { "epoch": 0.38314494680851063, "grad_norm": 74.54539615641788, "learning_rate": 1.9705398650062587e-07, "loss": 0.5729, "step": 2305, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4095238095238095, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4777777777777778, "success_rate.epoch.env.logic": 0.4177777777777778, "success_rate.epoch.env.math": 0.8555758683729433, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.6720856463124505, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49073989411117774, "success_rate.epoch.global": 0.6293652816721896, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9957757654966393, "tokens_p.mean_in_band": 0.542016006097561, "tokens_rate.above_band": 0.9422941590429276, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.057705840957072485 }, { "epoch": 0.3839760638297872, "grad_norm": 148.1718887318526, "learning_rate": 1.9704005249838053e-07, "loss": 0.7807, "step": 2310, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4095238095238095, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4725274725274725, "success_rate.epoch.env.logic": 0.4177777777777778, "success_rate.epoch.env.math": 0.8558394160583942, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.6725978647686833, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49033311782857586, "success_rate.epoch.global": 0.62970498474059, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9869897959183673, "tokens_p.mean_in_band": 0.22960464015151516, "tokens_rate.above_band": 0.4260869565217391, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.5739130434782609 }, { "epoch": 0.38480718085106386, "grad_norm": 145.0841983963652, "learning_rate": 1.9702608825421662e-07, "loss": 0.8626, "step": 2315, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4725274725274725, "success_rate.epoch.env.logic": 0.4177777777777778, "success_rate.epoch.env.math": 0.8558394160583942, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.6732438831886346, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.490040625669906, "success_rate.epoch.global": 0.6300152361604876, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.3888888888888889, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9941611842105263, "tokens_p.mean_in_band": 0.6964518229166666, "tokens_rate.above_band": 0.9405940594059405, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0594059405940594 }, { "epoch": 0.38563829787234044, "grad_norm": 213.57600364462067, "learning_rate": 1.970120937929173e-07, "loss": 0.9426, "step": 2320, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4673913043478261, "success_rate.epoch.env.logic": 0.41685144124168516, "success_rate.epoch.env.math": 0.8558394160583942, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.6734935013784955, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4900106368842669, "success_rate.epoch.global": 0.6300050684237202, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9958898747390397, "tokens_p.mean_in_band": 0.5388842680608364, "tokens_rate.above_band": 0.9425010931351115, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.057498906864888503 }, { "epoch": 0.386469414893617, "grad_norm": 339.14442542069975, "learning_rate": 1.9699806913931925e-07, "loss": 0.8085, "step": 2325, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4673913043478261, "success_rate.epoch.env.logic": 0.4175824175824176, "success_rate.epoch.env.math": 0.8561020036429873, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.6740070782540307, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49014764968434493, "success_rate.epoch.global": 0.6303413400758533, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9945667220376523, "tokens_p.mean_in_band": 0.6427859042553191, "tokens_rate.above_band": 0.9505263157894737, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049473684210526316 }, { "epoch": 0.3873005319148936, "grad_norm": 164.1906112757732, "learning_rate": 1.9698401431831282e-07, "loss": 0.6465, "step": 2330, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4673913043478261, "success_rate.epoch.env.logic": 0.41885964912280704, "success_rate.epoch.env.math": 0.8561020036429873, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.6747744213417026, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49033352010507786, "success_rate.epoch.global": 0.6309944472488642, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9895287958115183, "tokens_p.mean_in_band": 0.7799479166666666, "tokens_rate.above_band": 0.9138755980861244, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0861244019138756 }, { "epoch": 0.3881316489361702, "grad_norm": 136.1435542467951, "learning_rate": 1.9696992935484188e-07, "loss": 0.7365, "step": 2335, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4673913043478261, "success_rate.epoch.env.logic": 0.41885964912280704, "success_rate.epoch.env.math": 0.8563636363636363, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.675146771037182, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.4903911548701804, "success_rate.epoch.global": 0.6313932980599647, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9829931972789115, "tokens_p.mean_in_band": 0.6982717803030303, "tokens_rate.above_band": 0.8166666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18333333333333332 }, { "epoch": 0.38896276595744683, "grad_norm": 222.28414961351368, "learning_rate": 1.9695581427390374e-07, "loss": 0.9462, "step": 2340, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4673913043478261, "success_rate.epoch.env.logic": 0.4201312910284464, "success_rate.epoch.env.math": 0.8566243194192378, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.6755173760249903, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.49056414850191227, "success_rate.epoch.global": 0.6318833291425697, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9887820512820513, "tokens_p.mean_in_band": 0.5688244047619048, "tokens_rate.above_band": 0.9176470588235294, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08235294117647059 }, { "epoch": 0.3897938829787234, "grad_norm": 182.55120314801775, "learning_rate": 1.969416691005493e-07, "loss": 0.7169, "step": 2345, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4673913043478261, "success_rate.epoch.env.logic": 0.42139737991266374, "success_rate.epoch.env.math": 0.8568840579710145, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.6753804135778385, "success_rate.epoch.env.webshop": 0.3333333333333333, "success_rate.epoch.env_macro_mean": 0.47553889380392816, "success_rate.epoch.global": 0.6318432948267202, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9913740920096852, "tokens_p.mean_in_band": 0.49870445979899497, "tokens_rate.above_band": 0.6748366013071896, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.32516339869281047 }, { "epoch": 0.390625, "grad_norm": 136.82508102092632, "learning_rate": 1.969274938598827e-07, "loss": 0.8511, "step": 2350, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.41956521739130437, "success_rate.epoch.env.math": 0.8568840579710145, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.6754966887417219, "success_rate.epoch.env.webshop": 0.3333333333333333, "success_rate.epoch.env_macro_mean": 0.4764130949448388, "success_rate.epoch.global": 0.6318295739348371, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9962775037936267, "tokens_p.mean_in_band": 0.6385143649193549, "tokens_rate.above_band": 0.9770200148257969, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022979985174203115 }, { "epoch": 0.3914561170212766, "grad_norm": 117.64524890024998, "learning_rate": 1.9691328857706155e-07, "loss": 0.6044, "step": 2355, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.41865509761388287, "success_rate.epoch.env.math": 0.8568840579710145, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.675990675990676, "success_rate.epoch.env.webshop": 0.3333333333333333, "success_rate.epoch.env_macro_mean": 0.47637526471497815, "success_rate.epoch.global": 0.6320660330165082, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9945673076923077, "tokens_p.mean_in_band": 0.5319393382352942, "tokens_rate.above_band": 0.9502923976608187, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049707602339181284 }, { "epoch": 0.39228723404255317, "grad_norm": 180.84536452626043, "learning_rate": 1.9689905327729675e-07, "loss": 0.7345, "step": 2360, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.41865509761388287, "success_rate.epoch.env.math": 0.8568840579710145, "success_rate.epoch.env.sat": 0.08875739644970414, "success_rate.epoch.env.science": 0.6761055081458495, "success_rate.epoch.env.webshop": 0.3333333333333333, "success_rate.epoch.env_macro_mean": 0.47628907101710904, "success_rate.epoch.global": 0.6318681318681318, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.4375, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9929187192118226, "tokens_p.mean_in_band": 0.6389973958333334, "tokens_rate.above_band": 0.8087649402390438, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.19123505976095617 }, { "epoch": 0.3931183510638298, "grad_norm": 179.73182958390456, "learning_rate": 1.9688478798585246e-07, "loss": 0.8133, "step": 2365, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.4199134199134199, "success_rate.epoch.env.math": 0.8571428571428571, "success_rate.epoch.env.sat": 0.08875739644970414, "success_rate.epoch.env.science": 0.6767324816105303, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4689082311628115, "success_rate.epoch.global": 0.6323529411764706, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.989227207977208, "tokens_p.mean_in_band": 0.582958984375, "tokens_rate.above_band": 0.814385150812065, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18561484918793503 }, { "epoch": 0.3939494680851064, "grad_norm": 145.6081595339324, "learning_rate": 1.9687049272804607e-07, "loss": 0.8447, "step": 2370, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.4199134199134199, "success_rate.epoch.env.math": 0.8571428571428571, "success_rate.epoch.env.sat": 0.08823529411764706, "success_rate.epoch.env.science": 0.6768457672980286, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4688710660133062, "success_rate.epoch.global": 0.6323126711476226, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4047619047619048, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9972426470588235, "tokens_p.mean_in_band": 0.595, "tokens_rate.above_band": 0.9373433583959899, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06265664160401002 }, { "epoch": 0.394780585106383, "grad_norm": 178.85416131259834, "learning_rate": 1.9685616752924818e-07, "loss": 0.7202, "step": 2375, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.4190064794816415, "success_rate.epoch.env.math": 0.8571428571428571, "success_rate.epoch.env.sat": 0.08823529411764706, "success_rate.epoch.env.science": 0.677207867335133, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4688215350683358, "success_rate.epoch.global": 0.632455268389662, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.3888888888888889, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9904442148760331, "tokens_p.mean_in_band": 0.6848958333333334, "tokens_rate.above_band": 0.88, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12 }, { "epoch": 0.39561170212765956, "grad_norm": 137.48257854830314, "learning_rate": 1.9684181241488252e-07, "loss": 0.8424, "step": 2380, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.4190064794816415, "success_rate.epoch.env.math": 0.8571428571428571, "success_rate.epoch.env.sat": 0.08823529411764706, "success_rate.epoch.env.science": 0.6775682954982686, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4688543012649845, "success_rate.epoch.global": 0.6327543424317618, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.3888888888888889, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.995958429561201, "tokens_p.mean_in_band": 0.6864346590909091, "tokens_rate.above_band": 0.9516483516483516, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04835164835164835 }, { "epoch": 0.39644281914893614, "grad_norm": 178.4357826812623, "learning_rate": 1.9682742741042585e-07, "loss": 0.7598, "step": 2385, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.41935483870967744, "success_rate.epoch.env.math": 0.8576576576576577, "success_rate.epoch.env.sat": 0.0872093023255814, "success_rate.epoch.env.science": 0.6779400461183704, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4688732938623366, "success_rate.epoch.global": 0.6328299083931667, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9992897727272727, "tokens_p.mean_in_band": 0.6840359669811321, "tokens_rate.above_band": 0.9372781065088758, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06272189349112427 }, { "epoch": 0.3972739361702128, "grad_norm": 133.1453574293202, "learning_rate": 1.9681301254140808e-07, "loss": 0.5345, "step": 2390, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47368421052631576, "success_rate.epoch.env.logic": 0.41755888650963596, "success_rate.epoch.env.math": 0.8579136690647482, "success_rate.epoch.env.sat": 0.0872093023255814, "success_rate.epoch.env.science": 0.6783109404990403, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.46830890839494704, "success_rate.epoch.global": 0.6327236777063767, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9940575132978723, "tokens_p.mean_in_band": 0.6707732371794872, "tokens_rate.above_band": 0.9506953223767383, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0493046776232617 }, { "epoch": 0.39810505319148937, "grad_norm": 103.41521914656765, "learning_rate": 1.9679856783341205e-07, "loss": 0.9273, "step": 2395, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46875, "success_rate.epoch.env.logic": 0.41755888650963596, "success_rate.epoch.env.math": 0.8581687612208259, "success_rate.epoch.env.sat": 0.0872093023255814, "success_rate.epoch.env.science": 0.678544061302682, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.46790472679798384, "success_rate.epoch.global": 0.6328645447816432, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9840158045977011, "tokens_p.mean_in_band": 0.5209029103053435, "tokens_rate.above_band": 0.7265135699373695, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.27348643006263046 }, { "epoch": 0.39893617021276595, "grad_norm": 202.55159073370515, "learning_rate": 1.9678409331207356e-07, "loss": 0.8917, "step": 2400, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46875, "success_rate.epoch.env.logic": 0.417910447761194, "success_rate.epoch.env.math": 0.8586762075134168, "success_rate.epoch.env.sat": 0.08670520231213873, "success_rate.epoch.env.science": 0.6786671773266948, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.46794818348477646, "success_rate.epoch.global": 0.6329145109632914, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9931579535299374, "tokens_p.mean_in_band": 0.7709558823529412, "tokens_rate.above_band": 0.9294019933554817, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07059800664451828 }, { "epoch": 0.39976728723404253, "grad_norm": 144.3813228811916, "learning_rate": 1.9676958900308137e-07, "loss": 0.7197, "step": 2405, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46875, "success_rate.epoch.env.logic": 0.4182590233545648, "success_rate.epoch.env.math": 0.8586762075134168, "success_rate.epoch.env.sat": 0.08670520231213873, "success_rate.epoch.env.science": 0.6794038975926634, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.46804684674471647, "success_rate.epoch.global": 0.6333907056798623, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9969470827679783, "tokens_p.mean_in_band": 0.6713541666666667, "tokens_rate.above_band": 0.9608865710560626, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03911342894393742 }, { "epoch": 0.4005984042553192, "grad_norm": 303.8825594122185, "learning_rate": 1.9675505493217704e-07, "loss": 0.781, "step": 2410, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46875, "success_rate.epoch.env.logic": 0.4194915254237288, "success_rate.epoch.env.math": 0.8594306049822064, "success_rate.epoch.env.sat": 0.08620689655172414, "success_rate.epoch.env.science": 0.6796338672768879, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4682030797866949, "success_rate.epoch.global": 0.6337993622761835, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9914259927797834, "tokens_p.mean_in_band": 0.5450994318181818, "tokens_rate.above_band": 0.8629283489096573, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13707165109034267 }, { "epoch": 0.40142952127659576, "grad_norm": 161.17103152176114, "learning_rate": 1.9674049112515505e-07, "loss": 0.8039, "step": 2415, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4639175257731959, "success_rate.epoch.env.logic": 0.4194915254237288, "success_rate.epoch.env.math": 0.8599290780141844, "success_rate.epoch.env.sat": 0.08571428571428572, "success_rate.epoch.env.science": 0.6796190476190476, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.46776294963304904, "success_rate.epoch.global": 0.633692458374143, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.41666666666666663, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9890287769784173, "tokens_p.mean_below_band": 1.8189894035458565e-09, "tokens_p.mean_in_band": 0.6151167168674698, "tokens_rate.above_band": 0.8062645011600929, "tokens_rate.below_band": 0.001160092807424594, "tokens_rate.in_band": 0.1925754060324826 }, { "epoch": 0.40226063829787234, "grad_norm": 494.3951137688073, "learning_rate": 1.9672589760786253e-07, "loss": 0.7364, "step": 2420, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4056603773584906, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4639175257731959, "success_rate.epoch.env.logic": 0.42071881606765327, "success_rate.epoch.env.math": 0.8601769911504424, "success_rate.epoch.env.sat": 0.08571428571428572, "success_rate.epoch.env.science": 0.6804711246200608, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.46797452061315764, "success_rate.epoch.global": 0.6344979232836551, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9900147928994083, "tokens_p.mean_in_band": 0.7309027777777778, "tokens_rate.above_band": 0.949438202247191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05056179775280899 }, { "epoch": 0.4030917553191489, "grad_norm": 93.89993135424115, "learning_rate": 1.9671127440619942e-07, "loss": 0.7083, "step": 2425, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.411214953271028, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4639175257731959, "success_rate.epoch.env.logic": 0.42105263157894735, "success_rate.epoch.env.math": 0.859402460456942, "success_rate.epoch.env.sat": 0.08571428571428572, "success_rate.epoch.env.science": 0.6808349146110056, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.46847248886054643, "success_rate.epoch.global": 0.6349012917377529, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.996326754385965, "tokens_p.mean_in_band": 0.762542724609375, "tokens_rate.above_band": 0.946843853820598, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.053156146179401995 }, { "epoch": 0.4039228723404255, "grad_norm": 161.3842075736701, "learning_rate": 1.966966215461183e-07, "loss": 0.8298, "step": 2430, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.411214953271028, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.42105263157894735, "success_rate.epoch.env.math": 0.8601398601398601, "success_rate.epoch.env.sat": 0.08571428571428572, "success_rate.epoch.env.science": 0.6808349146110056, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4686057923656543, "success_rate.epoch.global": 0.6351022395326192, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9896760633036598, "tokens_p.mean_in_band": 0.6954210069444444, "tokens_rate.above_band": 0.8753246753246753, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12467532467532468 }, { "epoch": 0.40475398936170215, "grad_norm": 165.3783824669458, "learning_rate": 1.9668193905362442e-07, "loss": 0.7713, "step": 2435, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.411214953271028, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.42016806722689076, "success_rate.epoch.env.math": 0.8586387434554974, "success_rate.epoch.env.sat": 0.08571428571428572, "success_rate.epoch.env.science": 0.6813020439061317, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4684313785709913, "success_rate.epoch.global": 0.635171241195045, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.2916666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9954028925619834, "tokens_p.mean_in_band": 0.5255126953125, "tokens_rate.above_band": 0.9497645211930926, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05023547880690738 }, { "epoch": 0.40558510638297873, "grad_norm": 120.33365203894601, "learning_rate": 1.9666722695477561e-07, "loss": 0.7, "step": 2440, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.411214953271028, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.4205020920502092, "success_rate.epoch.env.math": 0.8588850174216028, "success_rate.epoch.env.sat": 0.08571428571428572, "success_rate.epoch.env.science": 0.6810117025292564, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4684577383357684, "success_rate.epoch.global": 0.6350860188999273, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6904761904761904, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9931102362204725, "tokens_p.mean_in_band": 0.6144649621212122, "tokens_rate.above_band": 0.8850174216027874, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11498257839721254 }, { "epoch": 0.4064162234042553, "grad_norm": 167.69555133464618, "learning_rate": 1.9665248527568227e-07, "loss": 0.6946, "step": 2445, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.411214953271028, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.42083333333333334, "success_rate.epoch.env.math": 0.8588850174216028, "success_rate.epoch.env.sat": 0.08571428571428572, "success_rate.epoch.env.science": 0.6816126601356444, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4685424836893604, "success_rate.epoch.global": 0.6354620222544751, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9927474402730375, "tokens_p.mean_in_band": 0.466734446347032, "tokens_rate.above_band": 0.8005464480874317, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1994535519125683 }, { "epoch": 0.4072473404255319, "grad_norm": 121.39749774839373, "learning_rate": 1.9663771404250722e-07, "loss": 0.6475, "step": 2450, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.411214953271028, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.42203742203742206, "success_rate.epoch.env.math": 0.8588850174216028, "success_rate.epoch.env.sat": 0.08522727272727272, "success_rate.epoch.env.science": 0.6817155756207675, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4686170283441058, "success_rate.epoch.global": 0.6355072463768116, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.41666666666666663, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9974385245901639, "tokens_p.mean_in_band": 0.7049725506756757, "tokens_rate.above_band": 0.9202586206896551, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07974137931034483 }, { "epoch": 0.4080784574468085, "grad_norm": 226.74189587465628, "learning_rate": 1.966229132814658e-07, "loss": 0.8378, "step": 2455, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.411214953271028, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.42203742203742206, "success_rate.epoch.env.math": 0.8596187175043327, "success_rate.epoch.env.sat": 0.08522727272727272, "success_rate.epoch.env.science": 0.6816816816816816, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4686806470844372, "success_rate.epoch.global": 0.6358158592431912, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9889481707317073, "tokens_p.mean_in_band": 0.6402994791666666, "tokens_rate.above_band": 0.9318181818181818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06818181818181818 }, { "epoch": 0.4089095744680851, "grad_norm": 155.17495827800832, "learning_rate": 1.9660808301882575e-07, "loss": 0.8739, "step": 2460, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.411214953271028, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.42203742203742206, "success_rate.epoch.env.math": 0.8581314878892734, "success_rate.epoch.env.sat": 0.08522727272727272, "success_rate.epoch.env.science": 0.6812734082397004, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4685083286247062, "success_rate.epoch.global": 0.6354667949951877, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.42857142857142855, "success_rate.window.env_macro_mean": 0.10714285714285714, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9965329835082459, "tokens_p.mean_in_band": 0.5718587239583334, "tokens_rate.above_band": 0.9328671328671329, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06713286713286713 }, { "epoch": 0.4097406914893617, "grad_norm": 112.72425591902896, "learning_rate": 1.965932232809071e-07, "loss": 0.6732, "step": 2465, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.411214953271028, "success_rate.epoch.env.agentgym:sciworld": 0.25, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.42203742203742206, "success_rate.epoch.env.math": 0.8586206896551725, "success_rate.epoch.env.sat": 0.08522727272727272, "success_rate.epoch.env.science": 0.6814953271028037, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4685729759546154, "success_rate.epoch.global": 0.6358395387941388, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.43333333333333335, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9906914893617021, "tokens_p.mean_below_band": 4.760636329592671e-13, "tokens_p.mean_in_band": 0.61181640625, "tokens_rate.above_band": 0.8703703703703703, "tokens_rate.below_band": 0.006172839506172839, "tokens_rate.in_band": 0.12345679012345678 }, { "epoch": 0.4105718085106383, "grad_norm": 120.93056771817326, "learning_rate": 1.965783340940823e-07, "loss": 0.5889, "step": 2470, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.42203742203742206, "success_rate.epoch.env.math": 0.8588640275387264, "success_rate.epoch.env.sat": 0.08522727272727272, "success_rate.epoch.env.science": 0.6822081312942931, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48195012106474483, "success_rate.epoch.global": 0.636385426653883, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9989961961115807, "tokens_p.mean_in_band": 0.57529296875, "tokens_rate.above_band": 0.9833748960931006, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01662510390689942 }, { "epoch": 0.4114029255319149, "grad_norm": 123.10377028336032, "learning_rate": 1.9656341548477602e-07, "loss": 0.6356, "step": 2475, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.42203742203742206, "success_rate.epoch.env.math": 0.8588640275387264, "success_rate.epoch.env.sat": 0.08522727272727272, "success_rate.epoch.env.science": 0.683153588694682, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4820360717375074, "success_rate.epoch.global": 0.6370813397129187, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9889914772727273, "tokens_p.mean_in_band": 0.75453125, "tokens_rate.above_band": 0.8756218905472637, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12437810945273632 }, { "epoch": 0.4122340425531915, "grad_norm": 92.46562463980615, "learning_rate": 1.9654846747946509e-07, "loss": 0.6101, "step": 2480, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.4244306418219462, "success_rate.epoch.env.math": 0.8591065292096219, "success_rate.epoch.env.sat": 0.0903954802259887, "success_rate.epoch.env.science": 0.6831352154531947, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48274384952956634, "success_rate.epoch.global": 0.6374492476713638, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.4, "success_rate.window.env_macro_mean": 0.85, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9932278067885117, "tokens_p.mean_in_band": 0.6555059523809523, "tokens_rate.above_band": 0.948019801980198, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05198019801980198 }, { "epoch": 0.4130651595744681, "grad_norm": 196.18699160321822, "learning_rate": 1.9653349010467856e-07, "loss": 0.5627, "step": 2485, "success_rate.epoch.env.abd": 0.48, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.4244306418219462, "success_rate.epoch.env.math": 0.8591065292096219, "success_rate.epoch.env.sat": 0.0903954802259887, "success_rate.epoch.env.science": 0.683839881393625, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4847776070393024, "success_rate.epoch.global": 0.6380543633762518, "success_rate.window.env.abd": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9776011560693642, "tokens_p.mean_in_band": 0.75390625, "tokens_rate.above_band": 0.8398058252427184, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16019417475728157 }, { "epoch": 0.4138962765957447, "grad_norm": 144.46116431404732, "learning_rate": 1.9651848338699765e-07, "loss": 0.7585, "step": 2490, "success_rate.epoch.env.abd": 0.48, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.4256198347107438, "success_rate.epoch.env.math": 0.8598290598290599, "success_rate.epoch.env.sat": 0.0903954802259887, "success_rate.epoch.env.science": 0.6843079200592154, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4849939490551048, "success_rate.epoch.global": 0.6387434554973822, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9893382352941177, "tokens_p.mean_in_band": 0.6808712121212122, "tokens_rate.above_band": 0.8854166666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11458333333333333 }, { "epoch": 0.41472739361702127, "grad_norm": 135.361212760332, "learning_rate": 1.9650344735305554e-07, "loss": 0.7252, "step": 2495, "success_rate.epoch.env.abd": 0.48, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.4247422680412371, "success_rate.epoch.env.math": 0.8600682593856656, "success_rate.epoch.env.sat": 0.0903954802259887, "success_rate.epoch.env.science": 0.6845216106390839, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48495534209755636, "success_rate.epoch.global": 0.6388690900451414, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9995639534883721, "tokens_p.mean_in_band": 0.5611979166666666, "tokens_rate.above_band": 0.9388646288209607, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0611353711790393 }, { "epoch": 0.41555851063829785, "grad_norm": 105.40935994278132, "learning_rate": 1.9648838202953754e-07, "loss": 0.7333, "step": 2500, "success_rate.epoch.env.abd": 0.48, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.42505133470225875, "success_rate.epoch.env.math": 0.8603066439522998, "success_rate.epoch.env.sat": 0.0903954802259887, "success_rate.epoch.env.science": 0.6851032448377581, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4850579862272228, "success_rate.epoch.global": 0.6393170500355703, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994989067055393, "tokens_p.mean_in_band": 0.6082261029411765, "tokens_rate.above_band": 0.9758179231863442, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02418207681365576 }, { "epoch": 0.4163896276595745, "grad_norm": 99.93998301250781, "learning_rate": 1.9647328744318092e-07, "loss": 0.8724, "step": 2505, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46, "success_rate.epoch.env.logic": 0.42505133470225875, "success_rate.epoch.env.math": 0.8603066439522998, "success_rate.epoch.env.sat": 0.0903954802259887, "success_rate.epoch.env.science": 0.6853146853146853, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4864729840299921, "success_rate.epoch.global": 0.6394412878787878, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.3666666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9899129746835443, "tokens_p.mean_in_band": 0.5807904411764706, "tokens_rate.above_band": 0.8229166666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17708333333333334 }, { "epoch": 0.4172207446808511, "grad_norm": 122.93054612066635, "learning_rate": 1.9645816362077486e-07, "loss": 0.6416, "step": 2510, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46, "success_rate.epoch.env.logic": 0.42505133470225875, "success_rate.epoch.env.math": 0.8607809847198642, "success_rate.epoch.env.sat": 0.0903954802259887, "success_rate.epoch.env.science": 0.6858927259368112, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4865686550654185, "success_rate.epoch.global": 0.6400378161191208, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.98675, "tokens_p.mean_in_band": 0.75048828125, "tokens_rate.above_band": 0.8389261744966443, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1610738255033557 }, { "epoch": 0.41805186170212766, "grad_norm": 115.9545692365525, "learning_rate": 1.9644301058916043e-07, "loss": 0.606, "step": 2515, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.4262295081967213, "success_rate.epoch.env.math": 0.8610169491525423, "success_rate.epoch.env.sat": 0.0898876404494382, "success_rate.epoch.env.science": 0.6858715596330275, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4862350801928059, "success_rate.epoch.global": 0.6399244926852289, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.52, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.986927599009901, "tokens_p.mean_below_band": 8.003553375601768e-11, "tokens_p.mean_in_band": 0.6374438622754491, "tokens_rate.above_band": 0.8278688524590164, "tokens_rate.below_band": 0.0010245901639344263, "tokens_rate.in_band": 0.1711065573770492 }, { "epoch": 0.41888297872340424, "grad_norm": 111.80414903331769, "learning_rate": 1.9642782837523057e-07, "loss": 0.8307, "step": 2520, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.4262295081967213, "success_rate.epoch.env.math": 0.8612521150592216, "success_rate.epoch.env.sat": 0.0898876404494382, "success_rate.epoch.env.science": 0.6860805860805861, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4862754613159185, "success_rate.epoch.global": 0.6401979264844486, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9907608695652174, "tokens_p.mean_in_band": 0.6021535773026315, "tokens_rate.above_band": 0.8194774346793349, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18052256532066507 }, { "epoch": 0.4197140957446808, "grad_norm": 111.2901167045314, "learning_rate": 1.9641261700592996e-07, "loss": 0.6871, "step": 2525, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.4262295081967213, "success_rate.epoch.env.math": 0.8614864864864865, "success_rate.epoch.env.sat": 0.0898876404494382, "success_rate.epoch.env.science": 0.6864251738016831, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4846445922822678, "success_rate.epoch.global": 0.6403859731701577, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9832554517133957, "tokens_p.mean_in_band": 0.5588900862068965, "tokens_rate.above_band": 0.6888412017167382, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.3111587982832618 }, { "epoch": 0.42054521276595747, "grad_norm": 206.2463598646136, "learning_rate": 1.9639737650825513e-07, "loss": 0.625, "step": 2530, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.42276422764227645, "success_rate.epoch.env.math": 0.8617200674536256, "success_rate.epoch.env.sat": 0.0898876404494382, "success_rate.epoch.env.science": 0.6867690058479532, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48438205886944635, "success_rate.epoch.global": 0.6401221517500587, "success_rate.window.env.logic": 0.2, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.55, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9980053191489362, "tokens_p.mean_in_band": 0.5351004464285715, "tokens_rate.above_band": 0.9622844827586207, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03771551724137931 }, { "epoch": 0.42137632978723405, "grad_norm": 125.83773121134948, "learning_rate": 1.9638210690925411e-07, "loss": 0.6364, "step": 2535, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.42276422764227645, "success_rate.epoch.env.math": 0.8617200674536256, "success_rate.epoch.env.sat": 0.0898876404494382, "success_rate.epoch.env.science": 0.6865889212827988, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48436568754534143, "success_rate.epoch.global": 0.6400937866354045, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9952536962365591, "tokens_p.mean_in_band": 0.6362060546875, "tokens_rate.above_band": 0.9489795918367347, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05102040816326531 }, { "epoch": 0.42220744680851063, "grad_norm": 114.1143029733693, "learning_rate": 1.9636680823602678e-07, "loss": 0.7121, "step": 2540, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.4251012145748988, "success_rate.epoch.env.math": 0.8617200674536256, "success_rate.epoch.env.sat": 0.0898876404494382, "success_rate.epoch.env.science": 0.6870451237263464, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48461961385226604, "success_rate.epoch.global": 0.6405993912432686, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968184389140271, "tokens_p.mean_in_band": 0.6865234375, "tokens_rate.above_band": 0.9910313901345291, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008968609865470852 }, { "epoch": 0.4230385638297872, "grad_norm": 157.6008947199477, "learning_rate": 1.963514805157245e-07, "loss": 0.712, "step": 2545, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.42655935613682094, "success_rate.epoch.env.math": 0.8617200674536256, "success_rate.epoch.env.sat": 0.0898876404494382, "success_rate.epoch.env.science": 0.6877269426289034, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4848141557126732, "success_rate.epoch.global": 0.6411214953271028, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9939077840112202, "tokens_p.mean_in_band": 0.5619255514705882, "tokens_rate.above_band": 0.9544846050870147, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04551539491298527 }, { "epoch": 0.42386968085106386, "grad_norm": 195.14534445210265, "learning_rate": 1.963361237755503e-07, "loss": 0.9004, "step": 2550, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.42655935613682094, "success_rate.epoch.env.math": 0.8619528619528619, "success_rate.epoch.env.sat": 0.0893854748603352, "success_rate.epoch.env.science": 0.6876811594202898, "success_rate.epoch.env.webshop": 0.2, "success_rate.epoch.env_macro_mean": 0.48024005077644777, "success_rate.epoch.global": 0.6409419445092096, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9908823529411764, "tokens_p.mean_in_band": 0.6134717987804879, "tokens_rate.above_band": 0.8382642998027613, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16173570019723865 }, { "epoch": 0.42470079787234044, "grad_norm": 88.0994800677281, "learning_rate": 1.9632073804275852e-07, "loss": 0.6849, "step": 2555, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.42655935613682094, "success_rate.epoch.env.math": 0.8626465661641541, "success_rate.epoch.env.sat": 0.08888888888888889, "success_rate.epoch.env.science": 0.6871832005792904, "success_rate.epoch.env.webshop": 0.2, "success_rate.epoch.env_macro_mean": 0.4802127016308883, "success_rate.epoch.global": 0.6407450523864959, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9966397849462365, "tokens_p.mean_in_band": 0.5589297715053764, "tokens_rate.above_band": 0.9285714285714286, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07142857142857142 }, { "epoch": 0.425531914893617, "grad_norm": 180.6289842904881, "learning_rate": 1.9630532334465513e-07, "loss": 0.7542, "step": 2560, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46078431372549017, "success_rate.epoch.env.logic": 0.42570281124497994, "success_rate.epoch.env.math": 0.8626465661641541, "success_rate.epoch.env.sat": 0.08888888888888889, "success_rate.epoch.env.science": 0.6878612716763006, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4776515163620584, "success_rate.epoch.global": 0.6410315985130112, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942334254143647, "tokens_p.mean_in_band": 0.5959256329113924, "tokens_rate.above_band": 0.9197154471544715, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08028455284552846 }, { "epoch": 0.4263630319148936, "grad_norm": 200.18281108651172, "learning_rate": 1.962898797085974e-07, "loss": 0.6978, "step": 2565, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46078431372549017, "success_rate.epoch.env.logic": 0.4248496993987976, "success_rate.epoch.env.math": 0.8626465661641541, "success_rate.epoch.env.sat": 0.08888888888888889, "success_rate.epoch.env.science": 0.6886486486486486, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47764554046443713, "success_rate.epoch.global": 0.641465677179963, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.2916666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9985665137614679, "tokens_p.mean_in_band": 0.5060975609756098, "tokens_rate.above_band": 0.9410071942446043, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.058992805755395686 }, { "epoch": 0.4271941489361702, "grad_norm": 84.51137493470681, "learning_rate": 1.9627440716199404e-07, "loss": 0.6582, "step": 2570, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46078431372549017, "success_rate.epoch.env.logic": 0.4248496993987976, "success_rate.epoch.env.math": 0.862876254180602, "success_rate.epoch.env.sat": 0.08888888888888889, "success_rate.epoch.env.science": 0.6892086330935252, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47771732887001206, "success_rate.epoch.global": 0.6419638721630384, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9918981481481481, "tokens_p.mean_in_band": 0.7374131944444444, "tokens_rate.above_band": 0.8823529411764706, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11764705882352941 }, { "epoch": 0.42802526595744683, "grad_norm": 144.80915410834996, "learning_rate": 1.9625890573230504e-07, "loss": 0.6578, "step": 2575, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46078431372549017, "success_rate.epoch.env.logic": 0.426, "success_rate.epoch.env.math": 0.8631051752921536, "success_rate.epoch.env.sat": 0.09392265193370165, "success_rate.epoch.env.science": 0.6890726096333573, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4782879617152301, "success_rate.epoch.global": 0.642146657413833, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9919354838709677, "tokens_p.mean_below_band": 1.4915713109076023e-10, "tokens_p.mean_in_band": 0.7178485576923077, "tokens_rate.above_band": 0.9465648854961832, "tokens_rate.below_band": 0.003816793893129771, "tokens_rate.in_band": 0.04961832061068702 }, { "epoch": 0.4288563829787234, "grad_norm": 232.37504892155536, "learning_rate": 1.962433754470416e-07, "loss": 0.7746, "step": 2580, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46078431372549017, "success_rate.epoch.env.logic": 0.42714570858283435, "success_rate.epoch.env.math": 0.8631051752921536, "success_rate.epoch.env.sat": 0.09392265193370165, "success_rate.epoch.env.science": 0.6892716182274847, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47841020873131757, "success_rate.epoch.global": 0.6424116424116424, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9893558951965066, "tokens_p.mean_in_band": 0.5439453125, "tokens_rate.above_band": 0.9196787148594378, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08032128514056225 }, { "epoch": 0.4296875, "grad_norm": 171.0714537781909, "learning_rate": 1.9622781633376618e-07, "loss": 0.6078, "step": 2585, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46078431372549017, "success_rate.epoch.env.logic": 0.42714570858283435, "success_rate.epoch.env.math": 0.8635607321131448, "success_rate.epoch.env.sat": 0.09392265193370165, "success_rate.epoch.env.science": 0.6891117478510028, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47843708931718204, "success_rate.epoch.global": 0.6425276752767528, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.98575, "tokens_p.mean_in_band": 0.7235753676470589, "tokens_rate.above_band": 0.8802816901408451, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11971830985915492 }, { "epoch": 0.4305186170212766, "grad_norm": 155.40530701835368, "learning_rate": 1.9621222842009239e-07, "loss": 0.7472, "step": 2590, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4563106796116505, "success_rate.epoch.env.logic": 0.4262948207171315, "success_rate.epoch.env.math": 0.8642384105960265, "success_rate.epoch.env.sat": 0.09392265193370165, "success_rate.epoch.env.science": 0.6891988555078684, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4780225678772006, "success_rate.epoch.global": 0.6425776754890679, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.35, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9979628791308285, "tokens_p.mean_in_band": 0.5255033052884616, "tokens_rate.above_band": 0.9550367488110679, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04496325118893212 }, { "epoch": 0.43134973404255317, "grad_norm": 169.74785476797277, "learning_rate": 1.96196611733685e-07, "loss": 0.7638, "step": 2595, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4074074074074074, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.4262948207171315, "success_rate.epoch.env.math": 0.8646864686468647, "success_rate.epoch.env.sat": 0.09392265193370165, "success_rate.epoch.env.science": 0.6895074946466809, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47769248555911675, "success_rate.epoch.global": 0.6428571428571429, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.984375, "tokens_p.mean_in_band": 0.6973711993243243, "tokens_rate.above_band": 0.8010752688172043, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1989247311827957 }, { "epoch": 0.4321808510638298, "grad_norm": 85.2886067688039, "learning_rate": 1.9618096630225975e-07, "loss": 0.6345, "step": 2600, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.41284403669724773, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.4262948207171315, "success_rate.epoch.env.math": 0.8651315789473685, "success_rate.epoch.env.sat": 0.09340659340659341, "success_rate.epoch.env.science": 0.6892373485388453, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47815571600960766, "success_rate.epoch.global": 0.6428243924805135, "success_rate.window.env.agentgym:alfworld": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.997536496350365, "tokens_p.mean_in_band": 0.5894191576086957, "tokens_rate.above_band": 0.9370725034199726, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06292749658002736 }, { "epoch": 0.4330119680851064, "grad_norm": 940.218283032904, "learning_rate": 1.961652921535835e-07, "loss": 0.8205, "step": 2605, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.41818181818181815, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.4262948207171315, "success_rate.epoch.env.math": 0.8637110016420362, "success_rate.epoch.env.sat": 0.09340659340659341, "success_rate.epoch.env.science": 0.6901208244491827, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47859214147229634, "success_rate.epoch.global": 0.6434126258005489, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9946395985401459, "tokens_p.mean_in_band": 0.7077414772727273, "tokens_rate.above_band": 0.9614035087719298, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03859649122807018 }, { "epoch": 0.433843085106383, "grad_norm": 118.2766197085642, "learning_rate": 1.9614958931547396e-07, "loss": 0.8494, "step": 2610, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.4274353876739563, "success_rate.epoch.env.math": 0.8641571194762684, "success_rate.epoch.env.sat": 0.09340659340659341, "success_rate.epoch.env.science": 0.6896062433487052, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4783471141925851, "success_rate.epoch.global": 0.6432321387811002, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9966322055137845, "tokens_p.mean_in_band": 0.49400111607142855, "tokens_rate.above_band": 0.9661016949152542, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03389830508474576 }, { "epoch": 0.43467420212765956, "grad_norm": 171.50132031955368, "learning_rate": 1.9613385781579986e-07, "loss": 0.7923, "step": 2615, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.4274353876739563, "success_rate.epoch.env.math": 0.8643790849673203, "success_rate.epoch.env.sat": 0.09289617486338798, "success_rate.epoch.env.science": 0.6900212314225053, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4783586173763711, "success_rate.epoch.global": 0.643507972665148, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9893973214285714, "tokens_p.mean_in_band": 0.6875, "tokens_rate.above_band": 0.8936170212765957, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10638297872340426 }, { "epoch": 0.43550531914893614, "grad_norm": 290.17082193721075, "learning_rate": 1.9611809768248077e-07, "loss": 0.7404, "step": 2620, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.4274353876739563, "success_rate.epoch.env.math": 0.8648208469055375, "success_rate.epoch.env.sat": 0.09289617486338798, "success_rate.epoch.env.science": 0.690081186021885, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4784042279706981, "success_rate.epoch.global": 0.6437826778813367, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9871608527131783, "tokens_p.mean_in_band": 0.64044189453125, "tokens_rate.above_band": 0.8896551724137931, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1103448275862069 }, { "epoch": 0.4363364361702128, "grad_norm": 100.94327819444389, "learning_rate": 1.9610230894348705e-07, "loss": 0.7111, "step": 2625, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.4274353876739563, "success_rate.epoch.env.math": 0.8648208469055375, "success_rate.epoch.env.sat": 0.09239130434782608, "success_rate.epoch.env.science": 0.6902748414376322, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4783759356888968, "success_rate.epoch.global": 0.6438138479001135, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4047619047619048, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9891902515723271, "tokens_p.mean_below_band": 2.1047890186309814e-07, "tokens_p.mean_in_band": 0.6842041015625, "tokens_rate.above_band": 0.828125, "tokens_rate.below_band": 0.005208333333333333, "tokens_rate.in_band": 0.16666666666666666 }, { "epoch": 0.43716755319148937, "grad_norm": 107.18524915131678, "learning_rate": 1.9608649162683986e-07, "loss": 0.7372, "step": 2630, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.42658730158730157, "success_rate.epoch.env.math": 0.8648208469055375, "success_rate.epoch.env.sat": 0.09239130434782608, "success_rate.epoch.env.science": 0.6904929577464789, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47831866570909604, "success_rate.epoch.global": 0.6438294010889292, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9954336086404066, "tokens_p.mean_in_band": 0.6334134615384616, "tokens_rate.above_band": 0.9308101714961561, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06918982850384388 }, { "epoch": 0.43799867021276595, "grad_norm": 99.61762381290823, "learning_rate": 1.9607064576061101e-07, "loss": 0.6751, "step": 2635, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.424901185770751, "success_rate.epoch.env.math": 0.8648208469055375, "success_rate.epoch.env.sat": 0.09239130434782608, "success_rate.epoch.env.science": 0.6912539515279241, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47823456370590467, "success_rate.epoch.global": 0.6441023318994793, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9995022753128555, "tokens_p.mean_in_band": 0.6292410714285714, "tokens_rate.above_band": 0.9617067833698031, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038293216630196934 }, { "epoch": 0.43882978723404253, "grad_norm": 158.99937759046935, "learning_rate": 1.9605477137292307e-07, "loss": 0.5855, "step": 2640, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.4251968503937008, "success_rate.epoch.env.math": 0.8654781199351702, "success_rate.epoch.env.sat": 0.09239130434782608, "success_rate.epoch.env.science": 0.6915789473684211, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4783507394780028, "success_rate.epoch.global": 0.6445197740112995, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9983057228915663, "tokens_p.mean_in_band": 0.5705818965517241, "tokens_rate.above_band": 0.9581529581529582, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04184704184704185 }, { "epoch": 0.4396609042553192, "grad_norm": 94.26411185425887, "learning_rate": 1.960388684919492e-07, "loss": 0.452, "step": 2645, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.4251968503937008, "success_rate.epoch.env.math": 0.8654781199351702, "success_rate.epoch.env.sat": 0.09239130434782608, "success_rate.epoch.env.science": 0.691768826619965, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4783680012281431, "success_rate.epoch.global": 0.6446952595936795, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9981278801843319, "tokens_p.mean_in_band": 0.5857558139534884, "tokens_rate.above_band": 0.9380403458213257, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06195965417867435 }, { "epoch": 0.44049202127659576, "grad_norm": 120.37472559932553, "learning_rate": 1.960229371459131e-07, "loss": 0.5471, "step": 2650, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.4263261296660118, "success_rate.epoch.env.math": 0.8659127625201939, "success_rate.epoch.env.sat": 0.0918918918918919, "success_rate.epoch.env.science": 0.6922000699545295, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4785039787495945, "success_rate.epoch.global": 0.6451104100946372, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9899017467248908, "tokens_p.mean_in_band": 0.7038762019230769, "tokens_rate.above_band": 0.8980392156862745, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10196078431372549 }, { "epoch": 0.44132313829787234, "grad_norm": 176.44776534657888, "learning_rate": 1.9600697736308897e-07, "loss": 0.7269, "step": 2655, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.42661448140900193, "success_rate.epoch.env.math": 0.8661290322580645, "success_rate.epoch.env.sat": 0.0918918918918919, "success_rate.epoch.env.science": 0.6926301082780301, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47858894782271827, "success_rate.epoch.global": 0.6454443194600675, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978433694181326, "tokens_p.mean_in_band": 0.6870404411764706, "tokens_rate.above_band": 0.9560155239327296, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04398447606727038 }, { "epoch": 0.4421542553191489, "grad_norm": 106.6329495832436, "learning_rate": 1.9599098917180159e-07, "loss": 0.605, "step": 2660, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.42661448140900193, "success_rate.epoch.env.math": 0.8663446054750402, "success_rate.epoch.env.sat": 0.0918918918918919, "success_rate.epoch.env.science": 0.6925758103868944, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4786036092159764, "success_rate.epoch.global": 0.6455525606469003, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9872159090909091, "tokens_p.mean_below_band": 1.7229467630386353e-08, "tokens_p.mean_in_band": 0.6243106617647058, "tokens_rate.above_band": 0.859375, "tokens_rate.below_band": 0.0078125, "tokens_rate.in_band": 0.1328125 }, { "epoch": 0.4429853723404255, "grad_norm": 130.92137917278285, "learning_rate": 1.9597497260042604e-07, "loss": 0.6431, "step": 2665, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.4249512670565302, "success_rate.epoch.env.math": 0.8665594855305466, "success_rate.epoch.env.sat": 0.0918918918918919, "success_rate.epoch.env.science": 0.6928695652173913, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4784986474462975, "success_rate.epoch.global": 0.6455951580363147, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9974032951289399, "tokens_p.mean_in_band": 0.5996492346938775, "tokens_rate.above_band": 0.9344042838018741, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06559571619812583 }, { "epoch": 0.44381648936170215, "grad_norm": 245.77192990999285, "learning_rate": 1.9595892767738783e-07, "loss": 0.6653, "step": 2670, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.42412451361867703, "success_rate.epoch.env.math": 0.8665594855305466, "success_rate.epoch.env.sat": 0.0918918918918919, "success_rate.epoch.env.science": 0.6929215822345594, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47842821686259884, "success_rate.epoch.global": 0.6455582904452898, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9977756709451575, "tokens_p.mean_in_band": 0.6217105263157895, "tokens_rate.above_band": 0.9575418994413408, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.042458100558659215 }, { "epoch": 0.44464760638297873, "grad_norm": 163.566667138646, "learning_rate": 1.9594285443116272e-07, "loss": 0.7178, "step": 2675, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.42330097087378643, "success_rate.epoch.env.math": 0.8665594855305466, "success_rate.epoch.env.sat": 0.0918918918918919, "success_rate.epoch.env.science": 0.6936656282450675, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4784209898867459, "success_rate.epoch.global": 0.6459682823319187, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9950443097014925, "tokens_p.mean_in_band": 0.6382637593283582, "tokens_rate.above_band": 0.8888888888888888, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1111111111111111 }, { "epoch": 0.4454787234042553, "grad_norm": 142.7759695604499, "learning_rate": 1.9592675289027682e-07, "loss": 1.0158, "step": 2680, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4144144144144144, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.42359767891682787, "success_rate.epoch.env.math": 0.8672, "success_rate.epoch.env.sat": 0.0913978494623656, "success_rate.epoch.env.science": 0.6938775510204082, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4784805446920467, "success_rate.epoch.global": 0.6461538461538462, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9975429975429976, "tokens_p.mean_in_band": 0.6613932291666667, "tokens_rate.above_band": 0.931350114416476, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06864988558352403 }, { "epoch": 0.4463098404255319, "grad_norm": 107.37520687521967, "learning_rate": 1.9591062308330643e-07, "loss": 0.6829, "step": 2685, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.42359767891682787, "success_rate.epoch.env.math": 0.8676236044657097, "success_rate.epoch.env.sat": 0.0913978494623656, "success_rate.epoch.env.science": 0.6941948859709745, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47821152748442375, "success_rate.epoch.global": 0.6464039189490092, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9964454976303317, "tokens_p.mean_below_band": 1.0477378964424133e-08, "tokens_p.mean_in_band": 0.7283653846153846, "tokens_rate.above_band": 0.9690011481056258, "tokens_rate.below_band": 0.001148105625717566, "tokens_rate.in_band": 0.029850746268656716 }, { "epoch": 0.4471409574468085, "grad_norm": 99.35581540618682, "learning_rate": 1.95894465038878e-07, "loss": 0.6909, "step": 2690, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4107142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.42359767891682787, "success_rate.epoch.env.math": 0.8680445151033387, "success_rate.epoch.env.sat": 0.09090909090909091, "success_rate.epoch.env.science": 0.694271911663216, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4782123618277506, "success_rate.epoch.global": 0.6465095598043575, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9974798387096774, "tokens_p.mean_in_band": 0.5875, "tokens_rate.above_band": 0.9316939890710383, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06830601092896176 }, { "epoch": 0.4479720744680851, "grad_norm": 146.93546651455324, "learning_rate": 1.958782787856681e-07, "loss": 0.7674, "step": 2695, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40707964601769914, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.4247104247104247, "success_rate.epoch.env.math": 0.8680445151033387, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.694032424974129, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4783548730765623, "success_rate.epoch.global": 0.6462358427714857, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.3, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9994692144373672, "tokens_p.mean_in_band": 0.5468394886363637, "tokens_rate.above_band": 0.9448345035105316, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.055165496489468405 }, { "epoch": 0.4488031914893617, "grad_norm": 169.7037098961248, "learning_rate": 1.9586206435240332e-07, "loss": 0.7134, "step": 2700, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.425, "success_rate.epoch.env.math": 0.8680445151033387, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.6943487250172296, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4780853277351806, "success_rate.epoch.global": 0.6462630294965624, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9954613590939374, "tokens_p.mean_in_band": 0.5613779920212766, "tokens_rate.above_band": 0.9696382428940569, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030361757105943153 }, { "epoch": 0.4496343085106383, "grad_norm": 93.71172162719347, "learning_rate": 1.9584582176786039e-07, "loss": 0.576, "step": 2705, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.42610364683301344, "success_rate.epoch.env.math": 0.8682539682539683, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.6946354883081155, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47823076985104684, "success_rate.epoch.global": 0.6466681425725038, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9904233870967742, "tokens_p.mean_in_band": 0.6768973214285714, "tokens_rate.above_band": 0.9637305699481865, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03626943005181347 }, { "epoch": 0.4504654255319149, "grad_norm": 97.88861330585847, "learning_rate": 1.9582955106086584e-07, "loss": 0.6309, "step": 2710, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45714285714285713, "success_rate.epoch.env.logic": 0.42610364683301344, "success_rate.epoch.env.math": 0.8686708860759493, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.695054945054945, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4787813293772824, "success_rate.epoch.global": 0.6472148541114059, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9957495564754583, "tokens_p.mean_below_band": 2.6659108698368073e-08, "tokens_p.mean_in_band": 0.7765299479166666, "tokens_rate.above_band": 0.9917888563049854, "tokens_rate.below_band": 0.0011730205278592375, "tokens_rate.in_band": 0.007038123167155425 }, { "epoch": 0.4512965425531915, "grad_norm": 279.2356921464699, "learning_rate": 1.9581325226029624e-07, "loss": 0.7458, "step": 2715, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45714285714285713, "success_rate.epoch.env.logic": 0.42610364683301344, "success_rate.epoch.env.math": 0.8686708860759493, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.6953392734749828, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47880717741546763, "success_rate.epoch.global": 0.6474613686534216, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9769736842105263, "tokens_p.mean_below_band": 1.9190338207408786e-10, "tokens_p.mean_in_band": 0.49556107954545453, "tokens_rate.above_band": 0.8172043010752689, "tokens_rate.below_band": 0.005376344086021506, "tokens_rate.in_band": 0.1774193548387097 }, { "epoch": 0.4521276595744681, "grad_norm": 87.13322919789367, "learning_rate": 1.9579692539507793e-07, "loss": 0.6673, "step": 2720, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45714285714285713, "success_rate.epoch.env.logic": 0.42610364683301344, "success_rate.epoch.env.math": 0.8686708860759493, "success_rate.epoch.env.sat": 0.09473684210526316, "success_rate.epoch.env.science": 0.6955183031132398, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4787778843705062, "success_rate.epoch.global": 0.6474867724867724, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9942528735632183, "tokens_p.mean_in_band": 0.7026452850877193, "tokens_rate.above_band": 0.7532467532467533, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.24675324675324675 }, { "epoch": 0.4529587765957447, "grad_norm": 163.96541388755767, "learning_rate": 1.9578057049418716e-07, "loss": 0.6901, "step": 2725, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45714285714285713, "success_rate.epoch.env.logic": 0.42610364683301344, "success_rate.epoch.env.math": 0.8690851735015773, "success_rate.epoch.env.sat": 0.09473684210526316, "success_rate.epoch.env.science": 0.6960382513661202, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47886281488673427, "success_rate.epoch.global": 0.6480299361655294, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9923986486486487, "tokens_p.mean_in_band": 0.7625679347826086, "tokens_rate.above_band": 0.9061224489795918, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09387755102040816 }, { "epoch": 0.45378989361702127, "grad_norm": 159.28682834799423, "learning_rate": 1.9576418758664977e-07, "loss": 0.6946, "step": 2730, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4528301886792453, "success_rate.epoch.env.logic": 0.4272030651340996, "success_rate.epoch.env.math": 0.8690851735015773, "success_rate.epoch.env.sat": 0.09473684210526316, "success_rate.epoch.env.science": 0.6964529331514324, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4786083995797148, "success_rate.epoch.global": 0.6482743460101121, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4047619047619048, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9918681210691824, "tokens_p.mean_in_band": 0.5435842041800643, "tokens_rate.above_band": 0.8035375868603917, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.19646241313960833 }, { "epoch": 0.45462101063829785, "grad_norm": 99.10923879794716, "learning_rate": 1.957477767015415e-07, "loss": 0.5883, "step": 2735, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45794392523364486, "success_rate.epoch.env.logic": 0.42638623326959846, "success_rate.epoch.env.math": 0.8690851735015773, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.6965258855585831, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4789159463971499, "success_rate.epoch.global": 0.6480140443274084, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.993332262210797, "tokens_p.mean_in_band": 0.6842830882352942, "tokens_rate.above_band": 0.884090909090909, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1159090909090909 }, { "epoch": 0.4554521276595745, "grad_norm": 176.25272954306467, "learning_rate": 1.957313378679876e-07, "loss": 0.7942, "step": 2740, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45794392523364486, "success_rate.epoch.env.logic": 0.42638623326959846, "success_rate.epoch.env.math": 0.8692913385826772, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.6971447994561523, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.478990953577029, "success_rate.epoch.global": 0.6485539000876425, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.987060546875, "tokens_p.mean_in_band": 0.7219122023809523, "tokens_rate.above_band": 0.8590604026845637, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14093959731543623 }, { "epoch": 0.4562832446808511, "grad_norm": 140.18723055648027, "learning_rate": 1.9571487111516298e-07, "loss": 0.578, "step": 2745, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45794392523364486, "success_rate.epoch.env.logic": 0.42638623326959846, "success_rate.epoch.env.math": 0.8701095461658842, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.6976586359009161, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4791120484886627, "success_rate.epoch.global": 0.6492455718346818, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9842479674796748, "tokens_p.mean_in_band": 0.73984375, "tokens_rate.above_band": 0.924812030075188, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07518796992481203 }, { "epoch": 0.45711436170212766, "grad_norm": 106.34718611030647, "learning_rate": 1.9569837647229212e-07, "loss": 0.6392, "step": 2750, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4537037037037037, "success_rate.epoch.env.logic": 0.42638623326959846, "success_rate.epoch.env.math": 0.8705148205928237, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.698170731707317, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4788099710980626, "success_rate.epoch.global": 0.6496398166339227, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.989195865161076, "tokens_p.mean_below_band": 4.4517219066619873e-07, "tokens_p.mean_in_band": 0.49621433985667035, "tokens_rate.above_band": 0.7683082418984435, "tokens_rate.below_band": 0.00025516713447307985, "tokens_rate.in_band": 0.23143659096708344 }, { "epoch": 0.45794547872340424, "grad_norm": 147.32985372023418, "learning_rate": 1.9568185396864895e-07, "loss": 0.6131, "step": 2755, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4537037037037037, "success_rate.epoch.env.logic": 0.42638623326959846, "success_rate.epoch.env.math": 0.8709175738724728, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.698477157360406, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4788744419101297, "success_rate.epoch.global": 0.6500218054949848, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9958029935275081, "tokens_p.mean_in_band": 0.6401599702380952, "tokens_rate.above_band": 0.9363636363636364, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06363636363636363 }, { "epoch": 0.4587765957446808, "grad_norm": 93.31939097639189, "learning_rate": 1.9566530363355683e-07, "loss": 0.6203, "step": 2760, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45871559633027525, "success_rate.epoch.env.logic": 0.42638623326959846, "success_rate.epoch.env.math": 0.8711180124223602, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.6986810957051065, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4793668300484169, "success_rate.epoch.global": 0.6503267973856209, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9967914908579466, "tokens_p.mean_in_band": 0.2386384587020649, "tokens_rate.above_band": 0.807495741056218, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.19250425894378195 }, { "epoch": 0.45960771276595747, "grad_norm": 153.5048217257842, "learning_rate": 1.9564872549638864e-07, "loss": 0.7348, "step": 2765, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45871559633027525, "success_rate.epoch.env.logic": 0.4269449715370019, "success_rate.epoch.env.math": 0.8713178294573644, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.6986486486486486, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47943283988895774, "success_rate.epoch.global": 0.6502827316224445, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9956165158371041, "tokens_p.mean_in_band": 0.619140625, "tokens_rate.above_band": 0.9525862068965517, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04741379310344827 }, { "epoch": 0.46043882978723405, "grad_norm": 81.5352851126888, "learning_rate": 1.9563211958656642e-07, "loss": 0.6267, "step": 2770, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.4269449715370019, "success_rate.epoch.env.math": 0.8713178294573644, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.699055330634278, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4790907071799404, "success_rate.epoch.global": 0.650445361720617, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9939828026434364, "tokens_p.mean_below_band": 3.265837828318278e-07, "tokens_p.mean_in_band": 0.4922722649186257, "tokens_rate.above_band": 0.8568854568854569, "tokens_rate.below_band": 0.0007722007722007722, "tokens_rate.in_band": 0.14234234234234233 }, { "epoch": 0.46126994680851063, "grad_norm": 87.01980633304194, "learning_rate": 1.9561548593356166e-07, "loss": 0.7436, "step": 2775, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.40350877192982454, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.4269449715370019, "success_rate.epoch.env.math": 0.871517027863777, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.6995621421353991, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47915488989880706, "success_rate.epoch.global": 0.6509004122369277, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998675431711146, "tokens_p.mean_in_band": 0.6654146634615384, "tokens_rate.above_band": 0.9607843137254902, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0392156862745098 }, { "epoch": 0.4621010638297872, "grad_norm": 121.68498038729926, "learning_rate": 1.9559882456689496e-07, "loss": 0.8066, "step": 2780, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.42613636363636365, "success_rate.epoch.env.math": 0.8719135802469136, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.6998654104979811, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47882602089110304, "success_rate.epoch.global": 0.6509965337954939, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.35, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9970794392523364, "tokens_p.mean_in_band": 0.5094992897727273, "tokens_rate.above_band": 0.9547014413177762, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045298558682223745 }, { "epoch": 0.46293218085106386, "grad_norm": 96.6506725239863, "learning_rate": 1.9558213551613622e-07, "loss": 0.6055, "step": 2785, "success_rate.epoch.env.abd": 0.48148148148148145, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.42613636363636365, "success_rate.epoch.env.math": 0.8719135802469136, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.7002349781805975, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4788596179531591, "success_rate.epoch.global": 0.6513086740211984, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.98515625, "tokens_p.mean_below_band": 3.097966327914037e-12, "tokens_p.mean_in_band": 0.6627987132352942, "tokens_rate.above_band": 0.7741935483870968, "tokens_rate.below_band": 0.0064516129032258064, "tokens_rate.in_band": 0.21935483870967742 }, { "epoch": 0.46376329787234044, "grad_norm": 135.11375940372034, "learning_rate": 1.9556541881090437e-07, "loss": 0.624, "step": 2790, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.42722117202268434, "success_rate.epoch.env.math": 0.8719135802469136, "success_rate.epoch.env.sat": 0.09326424870466321, "success_rate.epoch.env.science": 0.7006369426751592, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.48063412159898306, "success_rate.epoch.global": 0.6516198704103672, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942853009259259, "tokens_p.mean_in_band": 0.6832853618421053, "tokens_rate.above_band": 0.9578713968957872, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04212860310421286 }, { "epoch": 0.464594414893617, "grad_norm": 150.10775642762593, "learning_rate": 1.9554867448086747e-07, "loss": 0.6595, "step": 2795, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4051724137931034, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.42641509433962266, "success_rate.epoch.env.math": 0.8721109399075501, "success_rate.epoch.env.sat": 0.09326424870466321, "success_rate.epoch.env.science": 0.7004685408299867, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.48103369377402905, "success_rate.epoch.global": 0.6515739542906425, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9973897911832946, "tokens_p.mean_in_band": 0.49883497807017546, "tokens_rate.above_band": 0.9379760609357998, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.062023939064200215 }, { "epoch": 0.4654255319148936, "grad_norm": 172.64008702891334, "learning_rate": 1.9553190255574263e-07, "loss": 0.616, "step": 2800, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4051724137931034, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.4266917293233083, "success_rate.epoch.env.math": 0.8723076923076923, "success_rate.epoch.env.sat": 0.09326424870466321, "success_rate.epoch.env.science": 0.7006348145673238, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.481091844785044, "success_rate.epoch.global": 0.6517434352130865, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9911971830985915, "tokens_p.mean_in_band": 0.7157258064516129, "tokens_rate.above_band": 0.9015873015873016, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09841269841269841 }, { "epoch": 0.4662566489361702, "grad_norm": 105.32749578745384, "learning_rate": 1.955151030652959e-07, "loss": 0.5412, "step": 2805, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4051724137931034, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45045045045045046, "success_rate.epoch.env.logic": 0.4266917293233083, "success_rate.epoch.env.math": 0.8725038402457758, "success_rate.epoch.env.sat": 0.09326424870466321, "success_rate.epoch.env.science": 0.7009003001000333, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4807615383646612, "success_rate.epoch.global": 0.6519123334765793, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9906535388127854, "tokens_p.mean_below_band": 5.364418029785156e-07, "tokens_p.mean_in_band": 0.6409590871710527, "tokens_rate.above_band": 0.8513119533527697, "tokens_rate.below_band": 0.0009718172983479105, "tokens_rate.in_band": 0.1477162293488824 }, { "epoch": 0.46708776595744683, "grad_norm": 120.6877766221014, "learning_rate": 1.954982760393423e-07, "loss": 0.5028, "step": 2810, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4051724137931034, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45045045045045046, "success_rate.epoch.env.logic": 0.42696629213483145, "success_rate.epoch.env.math": 0.8726993865030674, "success_rate.epoch.env.sat": 0.09278350515463918, "success_rate.epoch.env.science": 0.7012987012987013, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4807967898844302, "success_rate.epoch.global": 0.6520806520806521, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0, "tokens_p.mean_in_band": 0.6525548986486487, "tokens_rate.above_band": 0.9529262086513995, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.047073791348600506 }, { "epoch": 0.4679188829787234, "grad_norm": 200.7677967798599, "learning_rate": 1.9548142150774558e-07, "loss": 0.8387, "step": 2815, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4017094017094017, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45045045045045046, "success_rate.epoch.env.logic": 0.42696629213483145, "success_rate.epoch.env.math": 0.8726993865030674, "success_rate.epoch.env.sat": 0.09278350515463918, "success_rate.epoch.env.science": 0.7015619807244932, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4805059050973475, "success_rate.epoch.global": 0.6521739130434783, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.16666666666666669, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9935496794871795, "tokens_p.mean_in_band": 0.5873125700672646, "tokens_rate.above_band": 0.8749298934380259, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1250701065619742 }, { "epoch": 0.46875, "grad_norm": 158.43588397544846, "learning_rate": 1.9546453950041847e-07, "loss": 0.6887, "step": 2820, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4017094017094017, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45045045045045046, "success_rate.epoch.env.logic": 0.4280373831775701, "success_rate.epoch.env.math": 0.8728943338437979, "success_rate.epoch.env.sat": 0.09278350515463918, "success_rate.epoch.env.science": 0.701527224435591, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4806178398332172, "success_rate.epoch.global": 0.6523320496362859, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.6785714285714286, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9926470588235294, "tokens_p.mean_in_band": 0.6404551630434783, "tokens_rate.above_band": 0.9057377049180327, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0942622950819672 }, { "epoch": 0.4695811170212766, "grad_norm": 120.2227106073086, "learning_rate": 1.954476300473224e-07, "loss": 0.6628, "step": 2825, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4017094017094017, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.4280373831775701, "success_rate.epoch.env.math": 0.8730886850152905, "success_rate.epoch.env.sat": 0.09278350515463918, "success_rate.epoch.env.science": 0.7016904209479615, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4802847188024886, "success_rate.epoch.global": 0.652424695577868, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9942905211912944, "tokens_p.mean_below_band": 1.4371835277415812e-07, "tokens_p.mean_in_band": 0.5088764289264414, "tokens_rate.above_band": 0.8736552414310733, "tokens_rate.below_band": 0.0005003752814610958, "tokens_rate.in_band": 0.1258443832874656 }, { "epoch": 0.47041223404255317, "grad_norm": 76.35485117265553, "learning_rate": 1.954306931784675e-07, "loss": 0.8529, "step": 2830, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4017094017094017, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.4280373831775701, "success_rate.epoch.env.math": 0.8732824427480916, "success_rate.epoch.env.sat": 0.09230769230769231, "success_rate.epoch.env.science": 0.7018530774321642, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47870646626246244, "success_rate.epoch.global": 0.6523779057368309, "success_rate.window.env.abd": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4285714285714286, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9876160990712074, "tokens_p.mean_below_band": 2.996789084540473e-07, "tokens_p.mean_in_band": 0.1055054440649018, "tokens_rate.above_band": 0.12079281974569933, "tokens_rate.below_band": 0.0033657442034405387, "tokens_rate.in_band": 0.8758414360508602 }, { "epoch": 0.4712433510638298, "grad_norm": 144.8206309987553, "learning_rate": 1.9541372892391251e-07, "loss": 0.8065, "step": 2835, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4017094017094017, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.42723880597014924, "success_rate.epoch.env.math": 0.8732824427480916, "success_rate.epoch.env.sat": 0.09230769230769231, "success_rate.epoch.env.science": 0.7021487603305785, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4786607485980073, "success_rate.epoch.global": 0.6524611122949073, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9999561095505618, "tokens_p.mean_in_band": 0.46010044642857145, "tokens_rate.above_band": 0.9442970822281167, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05570291777188329 }, { "epoch": 0.4720744680851064, "grad_norm": 165.1060233513649, "learning_rate": 1.9539673731376494e-07, "loss": 0.5386, "step": 2840, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4017094017094017, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.42723880597014924, "success_rate.epoch.env.math": 0.8736681887366818, "success_rate.epoch.env.sat": 0.09230769230769231, "success_rate.epoch.env.science": 0.7027383701748597, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4787494173100865, "success_rate.epoch.global": 0.6530525420123378, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.995, "tokens_p.mean_in_band": 0.7567471590909091, "tokens_rate.above_band": 0.9695290858725761, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030470914127423823 }, { "epoch": 0.472905585106383, "grad_norm": 170.13846986722578, "learning_rate": 1.9537971837818063e-07, "loss": 0.848, "step": 2845, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4017094017094017, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.42830540037243947, "success_rate.epoch.env.math": 0.8736681887366818, "success_rate.epoch.env.sat": 0.09230769230769231, "success_rate.epoch.env.science": 0.7030323005932761, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47887310138469613, "success_rate.epoch.global": 0.6533475026567481, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978507565337001, "tokens_p.mean_in_band": 0.5649181547619048, "tokens_rate.above_band": 0.9453836150845254, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.054616384915474644 }, { "epoch": 0.47373670212765956, "grad_norm": 242.3020452725528, "learning_rate": 1.9536267214736407e-07, "loss": 0.5517, "step": 2850, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4017094017094017, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.42830540037243947, "success_rate.epoch.env.math": 0.8738601823708206, "success_rate.epoch.env.sat": 0.09230769230769231, "success_rate.epoch.env.science": 0.703227931488801, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.478908339978302, "success_rate.epoch.global": 0.6535683942225998, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9847972972972973, "tokens_p.mean_below_band": 3.510081114654895e-12, "tokens_p.mean_in_band": 0.751171875, "tokens_rate.above_band": 0.8409090909090909, "tokens_rate.below_band": 0.007575757575757576, "tokens_rate.in_band": 0.15151515151515152 }, { "epoch": 0.47456781914893614, "grad_norm": 94.31219636240225, "learning_rate": 1.953455986515681e-07, "loss": 0.4852, "step": 2855, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4017094017094017, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.42936802973977695, "success_rate.epoch.env.math": 0.8742424242424243, "success_rate.epoch.env.sat": 0.09230769230769231, "success_rate.epoch.env.science": 0.7035208950312603, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47906632495842927, "success_rate.epoch.global": 0.6540093338990242, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.991243961352657, "tokens_p.mean_in_band": 0.7763671875, "tokens_rate.above_band": 0.9627906976744186, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037209302325581395 }, { "epoch": 0.4753989361702128, "grad_norm": 90.64235639072643, "learning_rate": 1.9532849792109408e-07, "loss": 0.6207, "step": 2860, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4017094017094017, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.42962962962962964, "success_rate.epoch.env.math": 0.8744326777609682, "success_rate.epoch.env.sat": 0.09230769230769231, "success_rate.epoch.env.science": 0.7036793692509855, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47912180928825854, "success_rate.epoch.global": 0.6541719610334604, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7111111111111111, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.996319018404908, "tokens_p.mean_in_band": 0.62421875, "tokens_rate.above_band": 0.9532163742690059, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04678362573099415 }, { "epoch": 0.47623005319148937, "grad_norm": 139.52553665825337, "learning_rate": 1.9531136998629155e-07, "loss": 0.589, "step": 2865, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4017094017094017, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.43068391866913125, "success_rate.epoch.env.math": 0.8746223564954683, "success_rate.epoch.env.sat": 0.10152284263959391, "success_rate.epoch.env.science": 0.7038373237126927, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.48008699770349583, "success_rate.epoch.global": 0.6546184738955824, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.991870777027027, "tokens_p.mean_below_band": 1.4435499906539917e-07, "tokens_p.mean_in_band": 0.6512784090909091, "tokens_rate.above_band": 0.961038961038961, "tokens_rate.below_band": 0.003246753246753247, "tokens_rate.in_band": 0.03571428571428571 }, { "epoch": 0.47706117021276595, "grad_norm": 84.48735718121672, "learning_rate": 1.9529421487755838e-07, "loss": 0.5614, "step": 2870, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4017094017094017, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.43068391866913125, "success_rate.epoch.env.math": 0.8748114630467572, "success_rate.epoch.env.sat": 0.10152284263959391, "success_rate.epoch.env.science": 0.7043222003929273, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.48014826890636164, "success_rate.epoch.global": 0.6550559425796918, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9889380530973452, "tokens_p.mean_in_band": 0.7828776041666666, "tokens_rate.above_band": 0.904, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.096 }, { "epoch": 0.47789228723404253, "grad_norm": 124.15262435835528, "learning_rate": 1.9527703262534075e-07, "loss": 0.4833, "step": 2875, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4017094017094017, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.43068391866913125, "success_rate.epoch.env.math": 0.8748114630467572, "success_rate.epoch.env.sat": 0.10152284263959391, "success_rate.epoch.env.science": 0.7045380346065948, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.48016789019851314, "success_rate.epoch.global": 0.6552886641382216, "success_rate.window.env.science": 0.7, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9859913793103449, "tokens_p.mean_below_band": 2.514570951461792e-08, "tokens_p.mean_in_band": 0.7063616071428571, "tokens_rate.above_band": 0.8011049723756906, "tokens_rate.below_band": 0.0055248618784530384, "tokens_rate.in_band": 0.19337016574585636 }, { "epoch": 0.4787234042553192, "grad_norm": 213.58935546013439, "learning_rate": 1.9525982326013288e-07, "loss": 0.8068, "step": 2880, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4017094017094017, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.4317343173431734, "success_rate.epoch.env.math": 0.8734939759036144, "success_rate.epoch.env.sat": 0.10606060606060606, "success_rate.epoch.env.science": 0.7049233778937072, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.480591164583879, "success_rate.epoch.global": 0.6555859457184936, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9963003778337531, "tokens_p.mean_in_band": 0.6088709677419355, "tokens_rate.above_band": 0.927570093457944, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07242990654205607 }, { "epoch": 0.47955452127659576, "grad_norm": 444.9925838797831, "learning_rate": 1.9524258681247724e-07, "loss": 0.5465, "step": 2885, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.430939226519337, "success_rate.epoch.env.math": 0.8734939759036144, "success_rate.epoch.env.sat": 0.10552763819095477, "success_rate.epoch.env.science": 0.7053077173559101, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.480966304590812, "success_rate.epoch.global": 0.655672268907563, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.375, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9983069329660239, "tokens_p.mean_in_band": 0.5727719907407407, "tokens_rate.above_band": 0.952755905511811, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.047244094488188976 }, { "epoch": 0.48038563829787234, "grad_norm": 226.97986778456246, "learning_rate": 1.9522532331296423e-07, "loss": 0.5787, "step": 2890, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.43119266055045874, "success_rate.epoch.env.math": 0.8734939759036144, "success_rate.epoch.env.sat": 0.10552763819095477, "success_rate.epoch.env.science": 0.7052700065061809, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.48098591578912037, "success_rate.epoch.global": 0.6556138509968521, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.3125, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9996327130264446, "tokens_p.mean_below_band": 3.655441105365753e-08, "tokens_p.mean_in_band": 0.6320219494047619, "tokens_rate.above_band": 0.9595864661654135, "tokens_rate.below_band": 0.0009398496240601503, "tokens_rate.in_band": 0.039473684210526314 }, { "epoch": 0.4812167553191489, "grad_norm": 162.05922051049942, "learning_rate": 1.9520803279223242e-07, "loss": 0.7941, "step": 2895, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.43040293040293043, "success_rate.epoch.env.math": 0.8738738738738738, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.7054238389087366, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4809146759740596, "success_rate.epoch.global": 0.6556346878927524, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9904092071611253, "tokens_p.mean_in_band": 0.6719215029761905, "tokens_rate.above_band": 0.8231578947368421, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17684210526315788 }, { "epoch": 0.4820478723404255, "grad_norm": 116.64160712247568, "learning_rate": 1.9519071528096818e-07, "loss": 0.7528, "step": 2900, "success_rate.epoch.env.abd": 0.4827586206896552, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.43040293040293043, "success_rate.epoch.env.math": 0.8738738738738738, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.7050243111831442, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4808783552717331, "success_rate.epoch.global": 0.6554393305439331, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.5238095238095238, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9978248898678415, "tokens_p.mean_in_band": 0.5424528301886793, "tokens_rate.above_band": 0.9553872053872053, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04461279461279461 }, { "epoch": 0.48287898936170215, "grad_norm": 97.81348489544554, "learning_rate": 1.9517337080990587e-07, "loss": 0.6511, "step": 2905, "success_rate.epoch.env.abd": 0.4666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.43040293040293043, "success_rate.epoch.env.math": 0.8729446935724963, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.7052154195011338, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.479348352907517, "success_rate.epoch.global": 0.6554534057668199, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 0.4, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.27999999999999997, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9918089207048458, "tokens_p.mean_below_band": 2.7830537874251604e-10, "tokens_p.mean_in_band": 0.19893307062980242, "tokens_rate.above_band": 0.42762951334379906, "tokens_rate.below_band": 0.0003139717425431711, "tokens_rate.in_band": 0.5720565149136577 }, { "epoch": 0.48371010638297873, "grad_norm": 117.5132882873014, "learning_rate": 1.951559994098277e-07, "loss": 0.6454, "step": 2910, "success_rate.epoch.env.abd": 0.4666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.43040293040293043, "success_rate.epoch.env.math": 0.8729446935724963, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.7050825509873746, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4793362739517207, "success_rate.epoch.global": 0.6553884711779449, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9863095238095239, "tokens_p.mean_in_band": 0.744735054347826, "tokens_rate.above_band": 0.8203125, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1796875 }, { "epoch": 0.4845412234042553, "grad_norm": 155.74678696384868, "learning_rate": 1.9513860111156363e-07, "loss": 0.6006, "step": 2915, "success_rate.epoch.env.abd": 0.4666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.43040293040293043, "success_rate.epoch.env.math": 0.8729446935724963, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.7052359405300582, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.479350218455601, "success_rate.epoch.global": 0.6555393281869393, "success_rate.window.env.science": 0.9, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9858774038461539, "tokens_p.mean_in_band": 0.7578125, "tokens_rate.above_band": 0.8739495798319328, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12605042016806722 }, { "epoch": 0.4853723404255319, "grad_norm": 90.83445935900029, "learning_rate": 1.9512117594599135e-07, "loss": 0.7775, "step": 2920, "success_rate.epoch.env.abd": 0.4666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4424778761061947, "success_rate.epoch.env.logic": 0.43040293040293043, "success_rate.epoch.env.math": 0.8703427719821163, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.7052937378954164, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4787597803149284, "success_rate.epoch.global": 0.6552083333333333, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9920130272952854, "tokens_p.mean_below_band": 4.6798959374427795e-08, "tokens_p.mean_in_band": 0.6425382653061225, "tokens_rate.above_band": 0.8448637316561844, "tokens_rate.below_band": 0.0010482180293501049, "tokens_rate.in_band": 0.1540880503144654 }, { "epoch": 0.4862034574468085, "grad_norm": 64.3381634631119, "learning_rate": 1.951037239440363e-07, "loss": 0.6782, "step": 2925, "success_rate.epoch.env.abd": 0.4666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4424778761061947, "success_rate.epoch.env.logic": 0.43144424131627057, "success_rate.epoch.env.math": 0.8703427719821163, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.705446342249436, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4788683180665066, "success_rate.epoch.global": 0.6554307116104869, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.6785714285714286, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9942620056497176, "tokens_p.mean_below_band": 6.007030606269836e-08, "tokens_p.mean_in_band": 0.48114224137931033, "tokens_rate.above_band": 0.921875, "tokens_rate.below_band": 0.0026041666666666665, "tokens_rate.in_band": 0.07552083333333333 }, { "epoch": 0.4870345744680851, "grad_norm": 145.6710540873504, "learning_rate": 1.9508624513667149e-07, "loss": 0.5386, "step": 2930, "success_rate.epoch.env.abd": 0.4666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4424778761061947, "success_rate.epoch.env.logic": 0.43144424131627057, "success_rate.epoch.env.math": 0.8703427719821163, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.7056931489224831, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47889075503678363, "success_rate.epoch.global": 0.6556525353283458, "success_rate.window.env.abd": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.981694560669456, "tokens_p.mean_in_band": 0.6909398320895522, "tokens_rate.above_band": 0.7810457516339869, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.21895424836601307 }, { "epoch": 0.4878656914893617, "grad_norm": 81.64995319749673, "learning_rate": 1.9506873955491753e-07, "loss": 0.612, "step": 2935, "success_rate.epoch.env.abd": 0.4666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4424778761061947, "success_rate.epoch.env.logic": 0.4306569343065693, "success_rate.epoch.env.math": 0.8705357142857143, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.7061657032755299, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47887968136832393, "success_rate.epoch.global": 0.6559452168499689, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978725416036308, "tokens_p.mean_in_band": 0.6726422991071429, "tokens_rate.above_band": 0.9218967921896792, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07810320781032078 }, { "epoch": 0.4886968085106383, "grad_norm": 119.2716227190001, "learning_rate": 1.9505120722984255e-07, "loss": 0.747, "step": 2940, "success_rate.epoch.env.abd": 0.4666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4424778761061947, "success_rate.epoch.env.logic": 0.42987249544626593, "success_rate.epoch.env.math": 0.8707280832095097, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.7063543003851092, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4788430020204213, "success_rate.epoch.global": 0.6560232220609579, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9949581430745814, "tokens_p.mean_in_band": 0.6673677884615384, "tokens_rate.above_band": 0.926657263751763, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07334273624823695 }, { "epoch": 0.4895279255319149, "grad_norm": 193.43265281618676, "learning_rate": 1.950336481925621e-07, "loss": 0.4983, "step": 2945, "success_rate.epoch.env.abd": 0.4666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4424778761061947, "success_rate.epoch.env.logic": 0.42987249544626593, "success_rate.epoch.env.math": 0.870919881305638, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.7068247356616469, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4789032050543, "success_rate.epoch.global": 0.6564506108925243, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9893229166666667, "tokens_p.mean_in_band": 0.8306361607142857, "tokens_rate.above_band": 0.8955223880597015, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1044776119402985 }, { "epoch": 0.4903590425531915, "grad_norm": 101.21918161840637, "learning_rate": 1.9501606247423918e-07, "loss": 0.7732, "step": 2950, "success_rate.epoch.env.abd": 0.45161290322580644, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43859649122807015, "success_rate.epoch.env.logic": 0.42987249544626593, "success_rate.epoch.env.math": 0.8711111111111111, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.70656, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4771751455838311, "success_rate.epoch.global": 0.6561207609594706, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9938687814421464, "tokens_p.mean_below_band": 2.4817381927277893e-07, "tokens_p.mean_in_band": 0.33247052599758165, "tokens_rate.above_band": 0.8115218870492175, "tokens_rate.below_band": 0.0009072352007257881, "tokens_rate.in_band": 0.1875708777500567 }, { "epoch": 0.4911901595744681, "grad_norm": 109.71645350827444, "learning_rate": 1.9499845010608413e-07, "loss": 0.6651, "step": 2955, "success_rate.epoch.env.abd": 0.45161290322580644, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4434782608695652, "success_rate.epoch.env.logic": 0.42987249544626593, "success_rate.epoch.env.math": 0.8711111111111111, "success_rate.epoch.env.sat": 0.10945273631840796, "success_rate.epoch.env.science": 0.7068414322250639, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4780493217824645, "success_rate.epoch.global": 0.6564759347242305, "success_rate.window.env.ded": 0.5, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936067635550587, "tokens_p.mean_below_band": 7.82310962677002e-07, "tokens_p.mean_in_band": 0.5332463357300885, "tokens_rate.above_band": 0.8876209377325726, "tokens_rate.below_band": 0.00024807740014884643, "tokens_rate.in_band": 0.11213098486727859 }, { "epoch": 0.4920212765957447, "grad_norm": 68.61644436380628, "learning_rate": 1.9498081111935464e-07, "loss": 0.8812, "step": 2960, "success_rate.epoch.env.abd": 0.45161290322580644, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4482758620689655, "success_rate.epoch.env.logic": 0.42987249544626593, "success_rate.epoch.env.math": 0.871301775147929, "success_rate.epoch.env.sat": 0.10945273631840796, "success_rate.epoch.env.science": 0.7064835515809645, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4784702658362935, "success_rate.epoch.global": 0.6564176640528271, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.3333333333333333, "success_rate.window.env_macro_mean": 0.4666666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9935853769300635, "tokens_p.mean_below_band": 5.364418029785156e-07, "tokens_p.mean_in_band": 0.7454769736842105, "tokens_rate.above_band": 0.934634974533107, "tokens_rate.below_band": 0.0008488964346349745, "tokens_rate.in_band": 0.06451612903225806 }, { "epoch": 0.49285239361702127, "grad_norm": 131.15530033218207, "learning_rate": 1.9496314554535549e-07, "loss": 0.6283, "step": 2965, "success_rate.epoch.env.abd": 0.45161290322580644, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4482758620689655, "success_rate.epoch.env.logic": 0.42987249544626593, "success_rate.epoch.env.math": 0.8716814159292036, "success_rate.epoch.env.sat": 0.10945273631840796, "success_rate.epoch.env.science": 0.7067645181876197, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.47853032105337795, "success_rate.epoch.global": 0.6567717996289425, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966299019607843, "tokens_p.mean_in_band": 0.6950334821428571, "tokens_rate.above_band": 0.9622641509433962, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03773584905660377 }, { "epoch": 0.49368351063829785, "grad_norm": 121.2542322405226, "learning_rate": 1.9494545341543883e-07, "loss": 0.3931, "step": 2970, "success_rate.epoch.env.abd": 0.45161290322580644, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4482758620689655, "success_rate.epoch.env.logic": 0.42987249544626593, "success_rate.epoch.env.math": 0.8718703976435935, "success_rate.epoch.env.sat": 0.10945273631840796, "success_rate.epoch.env.science": 0.7064073956008926, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4785150355195291, "success_rate.epoch.global": 0.6566426364572605, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9964895546659995, "tokens_p.mean_in_band": 0.7262931034482759, "tokens_rate.above_band": 0.9856966707768188, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014303329223181258 }, { "epoch": 0.4945146276595745, "grad_norm": 109.7852704933942, "learning_rate": 1.949277347610038e-07, "loss": 0.5924, "step": 2975, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4482758620689655, "success_rate.epoch.env.logic": 0.4301270417422868, "success_rate.epoch.env.math": 0.8724340175953079, "success_rate.epoch.env.sat": 0.10945273631840796, "success_rate.epoch.env.science": 0.7065944568333864, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4801643377244766, "success_rate.epoch.global": 0.6570018507094386, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9971774193548387, "tokens_p.mean_in_band": 0.6062127976190477, "tokens_rate.above_band": 0.9485924112607099, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.051407588739290085 }, { "epoch": 0.4953457446808511, "grad_norm": 88.73715716732671, "learning_rate": 1.9490998961349673e-07, "loss": 0.6279, "step": 2980, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.452991452991453, "success_rate.epoch.env.logic": 0.4301270417422868, "success_rate.epoch.env.math": 0.8724340175953079, "success_rate.epoch.env.sat": 0.10945273631840796, "success_rate.epoch.env.science": 0.7068362480127186, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4806150088246421, "success_rate.epoch.global": 0.6572895277207392, "success_rate.window.env.ded": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9970418689320388, "tokens_p.mean_in_band": 0.657421875, "tokens_rate.above_band": 0.9321266968325792, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06787330316742081 }, { "epoch": 0.49617686170212766, "grad_norm": 180.1899567193848, "learning_rate": 1.9489221800441086e-07, "loss": 0.7333, "step": 2985, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4491525423728814, "success_rate.epoch.env.logic": 0.4311594202898551, "success_rate.epoch.env.math": 0.8724340175953079, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.7070225611693677, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.48032754844794634, "success_rate.epoch.global": 0.6572307692307693, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9870590490797546, "tokens_p.mean_in_band": 0.6593611725663717, "tokens_rate.above_band": 0.8522875816993464, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1477124183006536 }, { "epoch": 0.49700797872340424, "grad_norm": 68.7296333699883, "learning_rate": 1.948744199652865e-07, "loss": 0.7045, "step": 2990, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.453781512605042, "success_rate.epoch.env.logic": 0.43037974683544306, "success_rate.epoch.env.math": 0.8726207906295754, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.7068527918781726, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.48067903031347553, "success_rate.epoch.global": 0.6571779643661684, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.52, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9972319347319347, "tokens_p.mean_below_band": 4.917383193969727e-07, "tokens_p.mean_in_band": 0.1843357669213974, "tokens_rate.above_band": 0.7366914710933028, "tokens_rate.below_band": 0.0011448196908986834, "tokens_rate.in_band": 0.2621637092157985 }, { "epoch": 0.4978390957446808, "grad_norm": 198.43959583424723, "learning_rate": 1.9485659552771073e-07, "loss": 0.6389, "step": 2995, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4067796610169492, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.43243243243243246, "success_rate.epoch.env.math": 0.8726207906295754, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.7071315372424722, "success_rate.epoch.env.webshop": 0.16666666666666666, "success_rate.epoch.env_macro_mean": 0.4752441744064677, "success_rate.epoch.global": 0.6574642126789366, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9964516932270916, "tokens_p.mean_in_band": 0.49622691761363635, "tokens_rate.above_band": 0.9716129032258064, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02838709677419355 }, { "epoch": 0.49867021276595747, "grad_norm": 81.84898694035189, "learning_rate": 1.948387447233176e-07, "loss": 0.7499, "step": 3000, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.40336134453781514, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.43243243243243246, "success_rate.epoch.env.math": 0.8726207906295754, "success_rate.epoch.env.sat": 0.10837438423645321, "success_rate.epoch.env.science": 0.707409753008233, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4857324481775213, "success_rate.epoch.global": 0.6574754901960784, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.4285714285714286, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9977814401622718, "tokens_p.mean_below_band": 3.0547380447387695e-07, "tokens_p.mean_in_band": 0.6332839439655172, "tokens_rate.above_band": 0.9426386233269598, "tokens_rate.below_band": 0.0019120458891013384, "tokens_rate.in_band": 0.055449330783938815 }, { "epoch": 0.49950132978723405, "grad_norm": 162.8283417155497, "learning_rate": 1.9482086758378793e-07, "loss": 0.5524, "step": 3005, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.40336134453781514, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.43243243243243246, "success_rate.epoch.env.math": 0.8726207906295754, "success_rate.epoch.env.sat": 0.10837438423645321, "success_rate.epoch.env.science": 0.7069238065128043, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48568827122339137, "success_rate.epoch.global": 0.6572128137114874, "success_rate.window.env.abd": 0.0, "success_rate.window.env.science": 0.5555555555555556, "success_rate.window.env_macro_mean": 0.2777777777777778, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9899929775280899, "tokens_p.mean_below_band": 7.566995918750763e-10, "tokens_p.mean_in_band": 0.48077713815789475, "tokens_rate.above_band": 0.8202764976958525, "tokens_rate.below_band": 0.004608294930875576, "tokens_rate.in_band": 0.17511520737327188 }, { "epoch": 0.5003324468085106, "grad_norm": 85.24720237975545, "learning_rate": 1.9480296414084925e-07, "loss": 0.6139, "step": 3010, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.40336134453781514, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.4326750448833034, "success_rate.epoch.env.math": 0.8715328467153285, "success_rate.epoch.env.sat": 0.10837438423645321, "success_rate.epoch.env.science": 0.7070707070707071, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.485624777504712, "success_rate.epoch.global": 0.6572301425661915, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9974302496328928, "tokens_p.mean_in_band": 0.6077724358974359, "tokens_rate.above_band": 0.9458333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05416666666666667 }, { "epoch": 0.5011635638297872, "grad_norm": 129.16856847399177, "learning_rate": 1.9478503442627576e-07, "loss": 0.8013, "step": 3015, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.40336134453781514, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.43189964157706096, "success_rate.epoch.env.math": 0.8717201166180758, "success_rate.epoch.env.sat": 0.10837438423645321, "success_rate.epoch.env.science": 0.706717123935667, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48553916691029975, "success_rate.epoch.global": 0.6569684638860631, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.65, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9954268292682927, "tokens_p.mean_in_band": 0.5942925347222222, "tokens_rate.above_band": 0.9458375125376128, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05416248746238716 }, { "epoch": 0.5019946808510638, "grad_norm": 142.34257098649343, "learning_rate": 1.9476707847188836e-07, "loss": 0.5914, "step": 3020, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.40336134453781514, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.43189964157706096, "success_rate.epoch.env.math": 0.872093023255814, "success_rate.epoch.env.sat": 0.10837438423645321, "success_rate.epoch.env.science": 0.7072710103871577, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4856234208275023, "success_rate.epoch.global": 0.6575258988421694, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9832677165354331, "tokens_p.mean_below_band": 1.1874362826347351e-08, "tokens_p.mean_in_band": 0.7096354166666666, "tokens_rate.above_band": 0.8698630136986302, "tokens_rate.below_band": 0.00684931506849315, "tokens_rate.in_band": 0.1232876712328767 }, { "epoch": 0.5028257978723404, "grad_norm": 71.54630062438862, "learning_rate": 1.9474909630955443e-07, "loss": 0.7388, "step": 3025, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.40336134453781514, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.43112701252236135, "success_rate.epoch.env.math": 0.872093023255814, "success_rate.epoch.env.sat": 0.10837438423645321, "success_rate.epoch.env.science": 0.707639107198994, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48599360735288194, "success_rate.epoch.global": 0.6577399066747819, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9966107871720117, "tokens_p.mean_in_band": 0.6602376302083334, "tokens_rate.above_band": 0.9469906129210381, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0530093870789619 }, { "epoch": 0.503656914893617, "grad_norm": 254.72011399752955, "learning_rate": 1.9473108797118788e-07, "loss": 0.6623, "step": 3030, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.43137254901960786, "success_rate.epoch.env.math": 0.872278664731495, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.7080062794348508, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4857123126882836, "success_rate.epoch.global": 0.6577561765897124, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9990269461077844, "tokens_p.mean_in_band": 0.5681537828947368, "tokens_rate.above_band": 0.9564719358533792, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043528064146620846 }, { "epoch": 0.5044880319148937, "grad_norm": 138.31030230057533, "learning_rate": 1.947130534887492e-07, "loss": 0.6561, "step": 3035, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.4306049822064057, "success_rate.epoch.env.math": 0.8726483357452967, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.7083725305738476, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4857094359009741, "success_rate.epoch.global": 0.6580384226491406, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952088100686499, "tokens_p.mean_in_band": 0.6116727941176471, "tokens_rate.above_band": 0.9625550660792952, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037444933920704845 }, { "epoch": 0.5053191489361702, "grad_norm": 132.76839341716322, "learning_rate": 1.9469499289424506e-07, "loss": 0.7476, "step": 3040, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4049586776859504, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.4306049822064057, "success_rate.epoch.env.math": 0.8726483357452967, "success_rate.epoch.env.sat": 0.1073170731707317, "success_rate.epoch.env.science": 0.7084246789852803, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4861171415385389, "success_rate.epoch.global": 0.6580488790143405, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.5599999999999999, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9920753588516746, "tokens_p.mean_in_band": 0.6683693910256411, "tokens_rate.above_band": 0.9414414414414415, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05855855855855856 }, { "epoch": 0.5061502659574468, "grad_norm": 110.40179907659918, "learning_rate": 1.9467690621972868e-07, "loss": 0.4889, "step": 3045, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4049586776859504, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.4306049822064057, "success_rate.epoch.env.math": 0.8726483357452967, "success_rate.epoch.env.sat": 0.1073170731707317, "success_rate.epoch.env.science": 0.7092442223610244, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4861916454817884, "success_rate.epoch.global": 0.6586693548387097, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9875, "tokens_p.mean_in_band": 0.7915296052631579, "tokens_rate.above_band": 0.8680555555555556, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13194444444444445 }, { "epoch": 0.5069813829787234, "grad_norm": 91.20474456042156, "learning_rate": 1.946587934972994e-07, "loss": 0.5502, "step": 3050, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4049586776859504, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.4308510638297872, "success_rate.epoch.env.math": 0.8726483357452967, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7090739008419084, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4861511730670964, "success_rate.epoch.global": 0.65841384863124, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.35714285714285715, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9927805280528053, "tokens_p.mean_in_band": 0.6816761363636363, "tokens_rate.above_band": 0.8463687150837989, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15363128491620112 }, { "epoch": 0.5078125, "grad_norm": 75.37530930954513, "learning_rate": 1.9464065475910295e-07, "loss": 0.5729, "step": 3055, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4049586776859504, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2727272727272727, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.43209876543209874, "success_rate.epoch.env.math": 0.8726483357452967, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7090342679127726, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4862609974919305, "success_rate.epoch.global": 0.6584238037796541, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9980395232120451, "tokens_p.mean_in_band": 0.6196933962264151, "tokens_rate.above_band": 0.9376470588235294, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06235294117647059 }, { "epoch": 0.5086436170212766, "grad_norm": 143.9155011548668, "learning_rate": 1.9462249003733106e-07, "loss": 0.6431, "step": 3060, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4049586776859504, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.43133802816901406, "success_rate.epoch.env.math": 0.8728323699421965, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7093963907902925, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4841753744995726, "success_rate.epoch.global": 0.6585023087733387, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9960570141065831, "tokens_p.mean_in_band": 0.6301339285714286, "tokens_rate.above_band": 0.9239681390296887, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07603186097031137 }, { "epoch": 0.5094747340425532, "grad_norm": 67.32241724482296, "learning_rate": 1.9460429936422172e-07, "loss": 0.7247, "step": 3065, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4016393442622951, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43133802816901406, "success_rate.epoch.env.math": 0.8728323699421965, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7092264678471575, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48351330356506905, "success_rate.epoch.global": 0.6581796311146753, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.19047619047619047, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9932995495495496, "tokens_p.mean_below_band": 9.424984455108643e-07, "tokens_p.mean_in_band": 0.4953420538651316, "tokens_rate.above_band": 0.784452296819788, "tokens_rate.below_band": 0.0007067137809187279, "tokens_rate.in_band": 0.21484098939929328 }, { "epoch": 0.5103058510638298, "grad_norm": 1513.0141409476253, "learning_rate": 1.945860827720589e-07, "loss": 0.706, "step": 3070, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4016393442622951, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43133802816901406, "success_rate.epoch.env.math": 0.8728323699421965, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7094573643410853, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4835342941554261, "success_rate.epoch.global": 0.6583900680816981, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9825858778625954, "tokens_p.mean_in_band": 0.6908967391304348, "tokens_rate.above_band": 0.8506493506493507, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14935064935064934 }, { "epoch": 0.5111369680851063, "grad_norm": 107.26116424799214, "learning_rate": 1.9456784029317258e-07, "loss": 0.852, "step": 3075, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4016393442622951, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.4305799648506151, "success_rate.epoch.env.math": 0.873015873015873, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7097772277227723, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4835111398951501, "success_rate.epoch.global": 0.6586048370977413, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9975043402777778, "tokens_p.mean_in_band": 0.5502604166666667, "tokens_rate.above_band": 0.9624060150375939, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03759398496240601 }, { "epoch": 0.511968085106383, "grad_norm": 68.83960272206484, "learning_rate": 1.9454957195993872e-07, "loss": 0.5301, "step": 3080, "success_rate.epoch.env.abd": 0.46875, "success_rate.epoch.env.agentgym:alfworld": 0.4016393442622951, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.4305799648506151, "success_rate.epoch.env.math": 0.873015873015873, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7100061766522545, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.483531953434194, "success_rate.epoch.global": 0.6588141345577959, "success_rate.window.env.science": 0.7, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.984375, "tokens_p.mean_below_band": 2.066371962428093e-09, "tokens_p.mean_in_band": 0.5927734375, "tokens_rate.above_band": 0.8943089430894309, "tokens_rate.below_band": 0.008130081300813009, "tokens_rate.in_band": 0.0975609756097561 }, { "epoch": 0.5127992021276596, "grad_norm": 76.3543828841282, "learning_rate": 1.945312778047791e-07, "loss": 0.5208, "step": 3085, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4065040650406504, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.4305799648506151, "success_rate.epoch.env.math": 0.8731988472622478, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7102746066029003, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4827239152088149, "success_rate.epoch.global": 0.6590229312063809, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9876771907216495, "tokens_p.mean_in_band": 0.6369310461956522, "tokens_rate.above_band": 0.8083333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.19166666666666668 }, { "epoch": 0.5136303191489362, "grad_norm": 195.15299431435912, "learning_rate": 1.9451295786016137e-07, "loss": 0.6438, "step": 3090, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4065040650406504, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.4305799648506151, "success_rate.epoch.env.math": 0.8735632183908046, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7105020018478596, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4827777121518619, "success_rate.epoch.global": 0.6593669122038622, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99625, "tokens_p.mean_in_band": 0.6681189903846154, "tokens_rate.above_band": 0.9615384615384616, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038461538461538464 }, { "epoch": 0.5144614361702128, "grad_norm": 83.95661236784456, "learning_rate": 1.9449461215859897e-07, "loss": 0.6049, "step": 3095, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4065040650406504, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43157894736842106, "success_rate.epoch.env.math": 0.8737446197991392, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7106802092951677, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48290122045853906, "success_rate.epoch.global": 0.659637955042769, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983148060548723, "tokens_p.mean_in_band": 0.6614583333333334, "tokens_rate.above_band": 0.9832558139534884, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01674418604651163 }, { "epoch": 0.5152925531914894, "grad_norm": 226.40219895590317, "learning_rate": 1.94476240732651e-07, "loss": 0.6722, "step": 3100, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.4065040650406504, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43157894736842106, "success_rate.epoch.env.math": 0.8737446197991392, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7111247695144438, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4817262728459987, "success_rate.epoch.global": 0.6598450228491953, "success_rate.window.env.abd": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9881365740740741, "tokens_p.mean_in_band": 0.627858231707317, "tokens_rate.above_band": 0.9133192389006343, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08668076109936575 }, { "epoch": 0.516123670212766, "grad_norm": 113.09732536535843, "learning_rate": 1.9445784361492222e-07, "loss": 0.7517, "step": 3105, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.4112903225806452, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43157894736842106, "success_rate.epoch.env.math": 0.8741058655221745, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.710995085995086, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4821824382772416, "success_rate.epoch.global": 0.6599841206828106, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.52, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.991117125984252, "tokens_p.mean_in_band": 0.47296091573816157, "tokens_rate.above_band": 0.7796193984039288, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2203806015960712 }, { "epoch": 0.5169547872340425, "grad_norm": 348.2089233710818, "learning_rate": 1.9443942083806308e-07, "loss": 0.6984, "step": 3110, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.4112903225806452, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.4308231173380035, "success_rate.epoch.env.math": 0.8742857142857143, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7111724984653162, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48214620475027364, "success_rate.epoch.global": 0.660055533518445, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9999247592295345, "tokens_p.mean_in_band": 0.5881453804347826, "tokens_rate.above_band": 0.9643962848297214, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03560371517027864 }, { "epoch": 0.5177859042553191, "grad_norm": 145.8757977490526, "learning_rate": 1.9442097243476943e-07, "loss": 0.6476, "step": 3115, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.4112903225806452, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43006993006993005, "success_rate.epoch.env.math": 0.8744650499286734, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7108249003373198, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4820624365909, "success_rate.epoch.global": 0.6597978997424212, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.5238095238095238, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9943340306834031, "tokens_p.mean_in_band": 0.6067266949152542, "tokens_rate.above_band": 0.9239690721649485, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07603092783505154 }, { "epoch": 0.5186170212765957, "grad_norm": 88.38912368014553, "learning_rate": 1.9440249843778268e-07, "loss": 0.5705, "step": 3120, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.4112903225806452, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43006993006993005, "success_rate.epoch.env.math": 0.8744650499286734, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7108323133414932, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48206311050037026, "success_rate.epoch.global": 0.6598733676296004, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.6785714285714286, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9937580541237113, "tokens_p.mean_in_band": 0.4886623475609756, "tokens_rate.above_band": 0.9498164014687882, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05018359853121175 }, { "epoch": 0.5194481382978723, "grad_norm": 91.8427462985612, "learning_rate": 1.9438399887988967e-07, "loss": 0.6048, "step": 3125, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.4112903225806452, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43006993006993005, "success_rate.epoch.env.math": 0.8746438746438746, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7105745721271394, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4820559362731746, "success_rate.epoch.global": 0.6598141925281676, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9938186813186813, "tokens_p.mean_in_band": 0.5875355113636364, "tokens_rate.above_band": 0.8921568627450981, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10784313725490197 }, { "epoch": 0.520279255319149, "grad_norm": 113.82402542440803, "learning_rate": 1.9436547379392255e-07, "loss": 0.5084, "step": 3130, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.4112903225806452, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43031358885017423, "success_rate.epoch.env.math": 0.8746438746438746, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.710927960927961, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48211021332599874, "success_rate.epoch.global": 0.6600197433366239, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9982169669669669, "tokens_p.mean_in_band": 0.5188078703703703, "tokens_rate.above_band": 0.961038961038961, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03896103896103896 }, { "epoch": 0.5211103723404256, "grad_norm": 136.74885881233408, "learning_rate": 1.9434692321275885e-07, "loss": 0.6728, "step": 3135, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.4112903225806452, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43130434782608695, "success_rate.epoch.env.math": 0.875, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7108470444850701, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48222530131592123, "success_rate.epoch.global": 0.6602286164761529, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9047619047619048, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9907142857142858, "tokens_p.mean_in_band": 0.690047554347826, "tokens_rate.above_band": 0.8838383838383839, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11616161616161616 }, { "epoch": 0.5219414893617021, "grad_norm": 189.61915445833725, "learning_rate": 1.9432834716932128e-07, "loss": 0.5548, "step": 3140, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.4112903225806452, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43130434782608695, "success_rate.epoch.env.math": 0.8753541076487252, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7112868877395802, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4822974786707608, "success_rate.epoch.global": 0.6606967132454241, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9886363636363636, "tokens_p.mean_in_band": 0.85546875, "tokens_rate.above_band": 0.934640522875817, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06535947712418301 }, { "epoch": 0.5227726063829787, "grad_norm": 105.52806631381361, "learning_rate": 1.9430974569657777e-07, "loss": 0.4033, "step": 3145, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.4112903225806452, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43130434782608695, "success_rate.epoch.env.math": 0.8755304101838756, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7112462006079028, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4823098073438038, "success_rate.epoch.global": 0.6607669616519174, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9957975679542204, "tokens_p.mean_in_band": 0.6809303977272727, "tokens_rate.above_band": 0.9694868238557559, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030513176144244106 }, { "epoch": 0.5236037234042553, "grad_norm": 220.24522055233118, "learning_rate": 1.9429111882754138e-07, "loss": 0.7186, "step": 3150, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.4112903225806452, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43327556325823224, "success_rate.epoch.env.math": 0.8755304101838756, "success_rate.epoch.env.sat": 0.10628019323671498, "success_rate.epoch.env.science": 0.7115092620710598, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4824660213099095, "success_rate.epoch.global": 0.6609703398153605, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9936507936507937, "tokens_p.mean_in_band": 0.60595703125, "tokens_rate.above_band": 0.8873239436619719, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11267605633802817 }, { "epoch": 0.5244348404255319, "grad_norm": 138.71029122122607, "learning_rate": 1.9427246659527022e-07, "loss": 0.7992, "step": 3155, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.416, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.4342560553633218, "success_rate.epoch.env.math": 0.8757062146892656, "success_rate.epoch.env.sat": 0.10628019323671498, "success_rate.epoch.env.science": 0.7115559599636033, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48300353693921655, "success_rate.epoch.global": 0.6612397018438604, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6599999999999999, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9966492663656885, "tokens_p.mean_in_band": 0.5879480698529411, "tokens_rate.above_band": 0.9630434782608696, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03695652173913044 }, { "epoch": 0.5252659574468085, "grad_norm": 120.43567329335224, "learning_rate": 1.9425378903286744e-07, "loss": 0.582, "step": 3160, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.416, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.4342560553633218, "success_rate.epoch.env.math": 0.8758815232722144, "success_rate.epoch.env.sat": 0.10628019323671498, "success_rate.epoch.env.science": 0.711864406779661, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4830475147027626, "success_rate.epoch.global": 0.6615746180963572, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9911114221724524, "tokens_p.mean_in_band": 0.6971344449626866, "tokens_rate.above_band": 0.8695228821811101, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13047711781888996 }, { "epoch": 0.5260970744680851, "grad_norm": 96.6958608439252, "learning_rate": 1.9423508617348117e-07, "loss": 0.6411, "step": 3165, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.416, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43523316062176165, "success_rate.epoch.env.math": 0.8760563380281691, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7118234048986998, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48305604950198583, "success_rate.epoch.global": 0.6614512028163505, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.55, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9928977272727273, "tokens_p.mean_in_band": 0.2745674070247934, "tokens_rate.above_band": 0.5217391304347826, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.4782608695652174 }, { "epoch": 0.5269281914893617, "grad_norm": 119.54343371979893, "learning_rate": 1.9421635805030436e-07, "loss": 0.6658, "step": 3170, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.416, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43448275862068964, "success_rate.epoch.env.math": 0.8762306610407876, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7121715493808517, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48303532818323114, "success_rate.epoch.global": 0.6616526665364328, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978797935103245, "tokens_p.mean_in_band": 0.636953125, "tokens_rate.above_band": 0.9313186813186813, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06868131868131869 }, { "epoch": 0.5277593085106383, "grad_norm": 62.26544055916979, "learning_rate": 1.9419760469657488e-07, "loss": 0.5262, "step": 3175, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.416, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.4354561101549053, "success_rate.epoch.env.math": 0.876577840112202, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7125188536953243, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4831869495396041, "success_rate.epoch.global": 0.6621147093250097, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9949292452830188, "tokens_p.mean_below_band": 9.049472282640636e-11, "tokens_p.mean_in_band": 0.78515625, "tokens_rate.above_band": 0.9706959706959707, "tokens_rate.below_band": 0.003663003663003663, "tokens_rate.in_band": 0.02564102564102564 }, { "epoch": 0.5285904255319149, "grad_norm": 230.32432476908042, "learning_rate": 1.9417882614557533e-07, "loss": 0.7394, "step": 3180, "success_rate.epoch.env.abd": 0.4411764705882353, "success_rate.epoch.env.agentgym:alfworld": 0.416, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.4339622641509434, "success_rate.epoch.env.math": 0.876577840112202, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7126921917395237, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4830669033614438, "success_rate.epoch.global": 0.6619883040935672, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.25, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5625, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9970193740685543, "tokens_p.mean_in_band": 0.5698962602459017, "tokens_rate.above_band": 0.9428571428571428, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05714285714285714 }, { "epoch": 0.5294215425531915, "grad_norm": 131.47905877378923, "learning_rate": 1.9416002243063302e-07, "loss": 0.6955, "step": 3185, "success_rate.epoch.env.abd": 0.45714285714285713, "success_rate.epoch.env.agentgym:alfworld": 0.416, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.4324786324786325, "success_rate.epoch.env.math": 0.876750700280112, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.713038241493526, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48443069107091874, "success_rate.epoch.global": 0.662125340599455, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9965521015761821, "tokens_p.mean_in_band": 0.5602430555555555, "tokens_rate.above_band": 0.962089300758214, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037910699241786014 }, { "epoch": 0.5302526595744681, "grad_norm": 153.3635923514554, "learning_rate": 1.941411935851199e-07, "loss": 0.7419, "step": 3190, "success_rate.epoch.env.abd": 0.45714285714285713, "success_rate.epoch.env.agentgym:alfworld": 0.416, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43270868824531517, "success_rate.epoch.env.math": 0.876750700280112, "success_rate.epoch.env.sat": 0.10476190476190476, "success_rate.epoch.env.science": 0.7133834586466166, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4844374201424589, "success_rate.epoch.global": 0.6621963070942662, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9950487012987013, "tokens_p.mean_in_band": 0.5748697916666666, "tokens_rate.above_band": 0.9553349875930521, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04466501240694789 }, { "epoch": 0.5310837765957447, "grad_norm": 178.19935137659635, "learning_rate": 1.9412233964245266e-07, "loss": 0.5919, "step": 3195, "success_rate.epoch.env.abd": 0.45714285714285713, "success_rate.epoch.env.agentgym:alfworld": 0.416, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.4336734693877551, "success_rate.epoch.env.math": 0.876750700280112, "success_rate.epoch.env.sat": 0.10426540284360189, "success_rate.epoch.env.science": 0.7138138138138138, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4845191141780347, "success_rate.epoch.global": 0.6624611801242236, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9988259668508287, "tokens_p.mean_in_band": 0.48353794642857145, "tokens_rate.above_band": 0.969989281886388, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030010718113612004 }, { "epoch": 0.5319148936170213, "grad_norm": 88.08463558337269, "learning_rate": 1.9410346063609236e-07, "loss": 0.5676, "step": 3200, "success_rate.epoch.env.abd": 0.45714285714285713, "success_rate.epoch.env.agentgym:alfworld": 0.42063492063492064, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43463497453310695, "success_rate.epoch.env.math": 0.8772663877266388, "success_rate.epoch.env.sat": 0.10426540284360189, "success_rate.epoch.env.science": 0.713985594237695, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4850903772371875, "success_rate.epoch.global": 0.662919170381857, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9982893061744113, "tokens_p.mean_in_band": 0.6092006138392857, "tokens_rate.above_band": 0.9655808236017209, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034419176398279044 }, { "epoch": 0.5327460106382979, "grad_norm": 232.53031082896106, "learning_rate": 1.940845565995446e-07, "loss": 0.6129, "step": 3205, "success_rate.epoch.env.abd": 0.45714285714285713, "success_rate.epoch.env.agentgym:alfworld": 0.42063492063492064, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.43485617597292725, "success_rate.epoch.env.math": 0.8776077885952712, "success_rate.epoch.env.sat": 0.10426540284360189, "success_rate.epoch.env.science": 0.7142428785607197, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48516491238550363, "success_rate.epoch.global": 0.6631823461091754, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992517605633803, "tokens_p.mean_in_band": 0.6482514880952381, "tokens_rate.above_band": 0.9712722298221614, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028727770177838577 }, { "epoch": 0.5335771276595744, "grad_norm": 377.2234128007386, "learning_rate": 1.9406562756635946e-07, "loss": 0.79, "step": 3210, "success_rate.epoch.env.abd": 0.45714285714285713, "success_rate.epoch.env.agentgym:alfworld": 0.42063492063492064, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.433389544688027, "success_rate.epoch.env.math": 0.8776077885952712, "success_rate.epoch.env.sat": 0.10377358490566038, "success_rate.epoch.env.science": 0.7144567494762047, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4850063143575621, "success_rate.epoch.global": 0.6629951690821256, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.2777777777777778, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9983662868632708, "tokens_p.mean_in_band": 0.5758770636792453, "tokens_rate.above_band": 0.9336670838548186, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06633291614518148 }, { "epoch": 0.534408244680851, "grad_norm": 108.34155735487911, "learning_rate": 1.9404667357013128e-07, "loss": 0.6534, "step": 3215, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.42063492063492064, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4634146341463415, "success_rate.epoch.env.logic": 0.43434343434343436, "success_rate.epoch.env.math": 0.8776077885952712, "success_rate.epoch.env.sat": 0.10377358490566038, "success_rate.epoch.env.science": 0.7147129186602871, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4843617586161798, "success_rate.epoch.global": 0.6631924338930708, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9955275229357798, "tokens_p.mean_in_band": 0.2726796468098958, "tokens_rate.above_band": 0.5866523143164694, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.4133476856835307 }, { "epoch": 0.5352393617021277, "grad_norm": 196.82178498878054, "learning_rate": 1.9402769464449882e-07, "loss": 0.6372, "step": 3220, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.42063492063492064, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4634146341463415, "success_rate.epoch.env.logic": 0.4336134453781513, "success_rate.epoch.env.math": 0.8779472954230236, "success_rate.epoch.env.sat": 0.10377358490566038, "success_rate.epoch.env.science": 0.7148834429169157, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4843417624451887, "success_rate.epoch.global": 0.6633243347473968, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.993702440304382, "tokens_p.mean_in_band": 0.5337826236263736, "tokens_rate.above_band": 0.8567895683453237, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14321043165467626 }, { "epoch": 0.5360704787234043, "grad_norm": 171.26493342908842, "learning_rate": 1.94008690823145e-07, "loss": 0.5479, "step": 3225, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.42063492063492064, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4634146341463415, "success_rate.epoch.env.logic": 0.4336134453781513, "success_rate.epoch.env.math": 0.8767313019390581, "success_rate.epoch.env.sat": 0.10377358490566038, "success_rate.epoch.env.science": 0.7154786758127051, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48428532966444543, "success_rate.epoch.global": 0.6636503658067, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9938650306748467, "tokens_p.mean_in_band": 0.6644965277777778, "tokens_rate.above_band": 0.9602356406480118, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039764359351988215 }, { "epoch": 0.5369015957446809, "grad_norm": 114.42520006426773, "learning_rate": 1.939896621397969e-07, "loss": 0.5981, "step": 3230, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.42063492063492064, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4634146341463415, "success_rate.epoch.env.logic": 0.43288590604026844, "success_rate.epoch.env.math": 0.8770718232044199, "success_rate.epoch.env.sat": 0.10328638497652583, "success_rate.epoch.env.science": 0.7155198093535895, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48420959471346625, "success_rate.epoch.global": 0.6635909265667052, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9956124641833811, "tokens_p.mean_in_band": 0.5901884191176471, "tokens_rate.above_band": 0.953551912568306, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04644808743169399 }, { "epoch": 0.5377327127659575, "grad_norm": 106.87530993936028, "learning_rate": 1.9397060862822585e-07, "loss": 0.5171, "step": 3235, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.42063492063492064, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4634146341463415, "success_rate.epoch.env.logic": 0.43288590604026844, "success_rate.epoch.env.math": 0.8758620689655172, "success_rate.epoch.env.sat": 0.10328638497652583, "success_rate.epoch.env.science": 0.7158583754834871, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4841303957944658, "success_rate.epoch.global": 0.6637219128096793, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9959766313932981, "tokens_p.mean_in_band": 0.5585186298076923, "tokens_rate.above_band": 0.9561551433389545, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04384485666104553 }, { "epoch": 0.538563829787234, "grad_norm": 120.9381389992513, "learning_rate": 1.9395153032224712e-07, "loss": 0.6498, "step": 3240, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.42063492063492064, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4634146341463415, "success_rate.epoch.env.logic": 0.4331103678929766, "success_rate.epoch.env.math": 0.8760330578512396, "success_rate.epoch.env.sat": 0.102803738317757, "success_rate.epoch.env.science": 0.7160273565269105, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4841378308056553, "success_rate.epoch.global": 0.6637253021292921, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9943310657596371, "tokens_p.mean_in_band": 0.6790916313559322, "tokens_rate.above_band": 0.882, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.118 }, { "epoch": 0.5393949468085106, "grad_norm": 117.5764329206785, "learning_rate": 1.9393242725572005e-07, "loss": 0.675, "step": 3245, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.42063492063492064, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46774193548387094, "success_rate.epoch.env.logic": 0.4331103678929766, "success_rate.epoch.env.math": 0.8762035763411279, "success_rate.epoch.env.sat": 0.10232558139534884, "success_rate.epoch.env.science": 0.7163647163647163, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48589892514727634, "success_rate.epoch.global": 0.6640490327523463, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9973137535816619, "tokens_p.mean_in_band": 0.6428571428571429, "tokens_rate.above_band": 0.9432432432432433, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05675675675675676 }, { "epoch": 0.5402260638297872, "grad_norm": 116.71089826451538, "learning_rate": 1.9391329946254788e-07, "loss": 0.582, "step": 3250, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.42063492063492064, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46774193548387094, "success_rate.epoch.env.logic": 0.4331103678929766, "success_rate.epoch.env.math": 0.8765432098765432, "success_rate.epoch.env.sat": 0.10232558139534884, "success_rate.epoch.env.science": 0.7158956109134045, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48588715497310386, "success_rate.epoch.global": 0.6639250191277736, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9881889763779528, "tokens_p.mean_below_band": 1.525040715932846e-08, "tokens_p.mean_in_band": 0.58984375, "tokens_rate.above_band": 0.8881118881118881, "tokens_rate.below_band": 0.006993006993006993, "tokens_rate.in_band": 0.1048951048951049 }, { "epoch": 0.5410571808510638, "grad_norm": 87.75839838589815, "learning_rate": 1.9389414697667777e-07, "loss": 0.7757, "step": 3255, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.42063492063492064, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46774193548387094, "success_rate.epoch.env.logic": 0.43333333333333335, "success_rate.epoch.env.math": 0.8768809849521204, "success_rate.epoch.env.sat": 0.10185185185185185, "success_rate.epoch.env.science": 0.7161481481481482, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4859180229919385, "success_rate.epoch.global": 0.6640565317035906, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9939018777614138, "tokens_p.mean_in_band": 0.5779109589041096, "tokens_rate.above_band": 0.8611287254280279, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1388712745719721 }, { "epoch": 0.5418882978723404, "grad_norm": 117.2183827307503, "learning_rate": 1.938749698321007e-07, "loss": 0.6221, "step": 3260, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.42063492063492064, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46774193548387094, "success_rate.epoch.env.logic": 0.43333333333333335, "success_rate.epoch.env.math": 0.8770491803278688, "success_rate.epoch.env.sat": 0.10185185185185185, "success_rate.epoch.env.science": 0.7161042036708112, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48592931852815774, "success_rate.epoch.global": 0.6641221374045801, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.5625, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9831349206349206, "tokens_p.mean_in_band": 0.5756655092592593, "tokens_rate.above_band": 0.8235294117647058, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17647058823529413 }, { "epoch": 0.542719414893617, "grad_norm": 98.40761766824093, "learning_rate": 1.9385576806285137e-07, "loss": 0.8351, "step": 3265, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.4251968503937008, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46774193548387094, "success_rate.epoch.env.logic": 0.43333333333333335, "success_rate.epoch.env.math": 0.8770491803278688, "success_rate.epoch.env.sat": 0.10185185185185185, "success_rate.epoch.env.science": 0.7161442933175636, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48634768392866073, "success_rate.epoch.global": 0.6642516682554814, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9047619047619048, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9950211864406779, "tokens_p.mean_below_band": 1.8189894035458565e-09, "tokens_p.mean_in_band": 0.7270972842261905, "tokens_rate.above_band": 0.9305993690851735, "tokens_rate.below_band": 0.0031545741324921135, "tokens_rate.in_band": 0.06624605678233439 }, { "epoch": 0.5435505319148937, "grad_norm": 393.78041537958535, "learning_rate": 1.9383654170300825e-07, "loss": 0.641, "step": 3270, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.4251968503937008, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46774193548387094, "success_rate.epoch.env.logic": 0.43261231281198004, "success_rate.epoch.env.math": 0.8772169167803547, "success_rate.epoch.env.sat": 0.10185185185185185, "success_rate.epoch.env.science": 0.7163518299881936, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48631625234700276, "success_rate.epoch.global": 0.664382257757472, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9982727465986394, "tokens_p.mean_in_band": 0.630859375, "tokens_rate.above_band": 0.9735099337748344, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026490066225165563 }, { "epoch": 0.5443816489361702, "grad_norm": 125.13192048028029, "learning_rate": 1.938172907866934e-07, "loss": 0.5023, "step": 3275, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.4251968503937008, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.464, "success_rate.epoch.env.logic": 0.43283582089552236, "success_rate.epoch.env.math": 0.8772169167803547, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7166863207547169, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48598413416460323, "success_rate.epoch.global": 0.6643223721725907, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.375, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9917733089579525, "tokens_p.mean_in_band": 0.6948275862068966, "tokens_rate.above_band": 0.8829701372074253, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11702986279257466 }, { "epoch": 0.5452127659574468, "grad_norm": 176.9919449863871, "learning_rate": 1.937980153480725e-07, "loss": 0.6543, "step": 3280, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.4251968503937008, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.464, "success_rate.epoch.env.logic": 0.43211920529801323, "success_rate.epoch.env.math": 0.8772169167803547, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7170588235294117, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.485952851180711, "success_rate.epoch.global": 0.6645161290322581, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9940591190108191, "tokens_p.mean_in_band": 0.65640625, "tokens_rate.above_band": 0.9627976190476191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03720238095238095 }, { "epoch": 0.5460438829787234, "grad_norm": 278.81311177224, "learning_rate": 1.9377871542135472e-07, "loss": 0.6573, "step": 3285, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.4251968503937008, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.464, "success_rate.epoch.env.logic": 0.43211920529801323, "success_rate.epoch.env.math": 0.8772169167803547, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7170975323149236, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4859563701612121, "success_rate.epoch.global": 0.6645809632157755, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4642857142857143, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.990516571724495, "tokens_p.mean_in_band": 0.5702287467838765, "tokens_rate.above_band": 0.8688413948256468, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1311586051743532 }, { "epoch": 0.546875, "grad_norm": 124.55345609261471, "learning_rate": 1.9375939104079268e-07, "loss": 0.5882, "step": 3290, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.4296875, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.464, "success_rate.epoch.env.logic": 0.4306930693069307, "success_rate.epoch.env.math": 0.8775510204081632, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7171806167400882, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.486272888494684, "success_rate.epoch.global": 0.6645833333333333, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975209243085881, "tokens_p.mean_in_band": 0.4116053427419355, "tokens_rate.above_band": 0.9779359430604982, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02206405693950178 }, { "epoch": 0.5477061170212766, "grad_norm": 201.7107469896585, "learning_rate": 1.9374004224068246e-07, "loss": 0.7538, "step": 3295, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.43410852713178294, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.464, "success_rate.epoch.env.logic": 0.4306930693069307, "success_rate.epoch.env.math": 0.8778833107191316, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7173466392720869, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4867201012196612, "success_rate.epoch.global": 0.6649006622516557, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9965431415929203, "tokens_p.mean_in_band": 0.6985193888346354, "tokens_rate.above_band": 0.9377593360995851, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06224066390041494 }, { "epoch": 0.5485372340425532, "grad_norm": 117.16934693105219, "learning_rate": 1.937206690553634e-07, "loss": 0.4866, "step": 3300, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.43410852713178294, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.464, "success_rate.epoch.env.logic": 0.4299835255354201, "success_rate.epoch.env.math": 0.878213802435724, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7170917619466433, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.486662471275992, "success_rate.epoch.global": 0.6647770219198791, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7222222222222222, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.997624269005848, "tokens_p.mean_in_band": 0.5042818509615384, "tokens_rate.above_band": 0.9293478260869565, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07065217391304347 }, { "epoch": 0.5493683510638298, "grad_norm": 167.2840586285636, "learning_rate": 1.9370127151921817e-07, "loss": 0.7433, "step": 3305, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.43410852713178294, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4603174603174603, "success_rate.epoch.env.logic": 0.4299835255354201, "success_rate.epoch.env.math": 0.8783783783783784, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7173403632103105, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48636525650542667, "success_rate.epoch.global": 0.664904663016802, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9896875, "tokens_p.mean_in_band": 0.6246939804147466, "tokens_rate.above_band": 0.8216926869350862, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17830731306491374 }, { "epoch": 0.5501994680851063, "grad_norm": 197.12579536218755, "learning_rate": 1.936818496666727e-07, "loss": 0.5641, "step": 3310, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.43410852713178294, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.4299835255354201, "success_rate.epoch.env.math": 0.8783783783783784, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7178362573099415, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48679665313415654, "success_rate.epoch.global": 0.6653469079939668, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9896599264705882, "tokens_p.mean_in_band": 0.7979910714285714, "tokens_rate.above_band": 0.9872958257713249, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012704174228675136 }, { "epoch": 0.551030585106383, "grad_norm": 114.70664501060222, "learning_rate": 1.9366240353219595e-07, "loss": 0.6868, "step": 3315, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.4307692307692308, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.4309210526315789, "success_rate.epoch.env.math": 0.8787061994609164, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7181658878504673, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4866380788029448, "success_rate.epoch.global": 0.6656626506024096, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996684350132626, "tokens_p.mean_in_band": 0.7360026041666666, "tokens_rate.above_band": 0.9843342036553525, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015665796344647518 }, { "epoch": 0.5518617021276596, "grad_norm": 111.47568696410329, "learning_rate": 1.9364293315030004e-07, "loss": 0.5449, "step": 3320, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.4307692307692308, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.4320785597381342, "success_rate.epoch.env.math": 0.8787061994609164, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7184947491248541, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48677320320121226, "success_rate.epoch.global": 0.6659146456100771, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9995216836734694, "tokens_p.mean_in_band": 0.52294921875, "tokens_rate.above_band": 0.9702970297029703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0297029702970297 }, { "epoch": 0.5526928191489362, "grad_norm": 85.21180144163797, "learning_rate": 1.9362343855554018e-07, "loss": 0.5307, "step": 3325, "success_rate.epoch.env.abd": 0.4594594594594595, "success_rate.epoch.env.agentgym:alfworld": 0.4307692307692308, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.4320785597381342, "success_rate.epoch.env.math": 0.8787061994609164, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7186953989516599, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48679144409455827, "success_rate.epoch.global": 0.6661032863849765, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4642857142857143, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9920908658420552, "tokens_p.mean_below_band": 2.9976945370435715e-09, "tokens_p.mean_in_band": 0.7104223901098901, "tokens_rate.above_band": 0.9195100612423447, "tokens_rate.below_band": 0.0008748906386701663, "tokens_rate.in_band": 0.07961504811898512 }, { "epoch": 0.5535239361702128, "grad_norm": 130.87655122855932, "learning_rate": 1.9360391978251446e-07, "loss": 0.5872, "step": 3330, "success_rate.epoch.env.abd": 0.46153846153846156, "success_rate.epoch.env.agentgym:alfworld": 0.4351145038167939, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.43300653594771243, "success_rate.epoch.env.math": 0.8788694481830417, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7189409368635438, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4874969930009358, "success_rate.epoch.global": 0.66641665104069, "success_rate.window.env.abd": 0.5, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9973012111637704, "tokens_p.mean_in_band": 0.1284136446156571, "tokens_rate.above_band": 0.40216010165184246, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.5978398983481575 }, { "epoch": 0.5543550531914894, "grad_norm": 88.45651137642601, "learning_rate": 1.9358437686586398e-07, "loss": 0.4208, "step": 3335, "success_rate.epoch.env.abd": 0.46153846153846156, "success_rate.epoch.env.agentgym:alfworld": 0.4351145038167939, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.433931484502447, "success_rate.epoch.env.math": 0.8793565683646113, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7187318208260617, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48760635233719235, "success_rate.epoch.global": 0.6665417759460472, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.993140243902439, "tokens_p.mean_in_band": 0.6883680555555556, "tokens_rate.above_band": 0.9647058823529412, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03529411764705882 }, { "epoch": 0.555186170212766, "grad_norm": 132.64204714576977, "learning_rate": 1.9356480984027253e-07, "loss": 0.407, "step": 3340, "success_rate.epoch.env.abd": 0.46153846153846156, "success_rate.epoch.env.agentgym:alfworld": 0.4351145038167939, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.433931484502447, "success_rate.epoch.env.math": 0.8793565683646113, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7193033381712627, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48765830845948327, "success_rate.epoch.global": 0.6669784845650141, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9902836134453782, "tokens_p.mean_in_band": 0.7677083333333333, "tokens_rate.above_band": 0.8880597014925373, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11194029850746269 }, { "epoch": 0.5560172872340425, "grad_norm": 124.2666115329214, "learning_rate": 1.9354521874046689e-07, "loss": 0.5664, "step": 3345, "success_rate.epoch.env.abd": 0.46153846153846156, "success_rate.epoch.env.agentgym:alfworld": 0.4351145038167939, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.433931484502447, "success_rate.epoch.env.math": 0.8798397863818425, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7197913648217907, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4877466034292796, "success_rate.epoch.global": 0.6675382891296228, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9945255474452555, "tokens_p.mean_in_band": 0.8627232142857143, "tokens_rate.above_band": 0.9513888888888888, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04861111111111111 }, { "epoch": 0.5568484042553191, "grad_norm": 108.21322101755437, "learning_rate": 1.9352560360121636e-07, "loss": 0.6237, "step": 3350, "success_rate.epoch.env.abd": 0.46153846153846156, "success_rate.epoch.env.agentgym:alfworld": 0.4351145038167939, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.433931484502447, "success_rate.epoch.env.math": 0.8801597869507324, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7197452229299363, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48777149967264644, "success_rate.epoch.global": 0.6676618772158985, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9998031496062992, "tokens_p.mean_in_band": 0.5998114224137931, "tokens_rate.above_band": 0.9563253012048193, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043674698795180725 }, { "epoch": 0.5576795212765957, "grad_norm": 64.96612927227478, "learning_rate": 1.935059644573331e-07, "loss": 0.5518, "step": 3355, "success_rate.epoch.env.abd": 0.46153846153846156, "success_rate.epoch.env.agentgym:alfworld": 0.4351145038167939, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.43322475570032576, "success_rate.epoch.env.math": 0.8801597869507324, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7199074074074074, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48772199564313284, "success_rate.epoch.global": 0.6676613204028348, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.2916666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9976851851851852, "tokens_p.mean_in_band": 0.6111328125, "tokens_rate.above_band": 0.9440559440559441, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.055944055944055944 }, { "epoch": 0.5585106382978723, "grad_norm": 306.3571322516184, "learning_rate": 1.9348630134367173e-07, "loss": 0.6717, "step": 3360, "success_rate.epoch.env.abd": 0.46153846153846156, "success_rate.epoch.env.agentgym:alfworld": 0.4351145038167939, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.43344155844155846, "success_rate.epoch.env.math": 0.8801597869507324, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.7201503324660307, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48776378907948337, "success_rate.epoch.global": 0.6677846096515745, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995844414893617, "tokens_p.mean_in_band": 0.5966996173469388, "tokens_rate.above_band": 0.9388264669163545, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06117353308364544 }, { "epoch": 0.559341755319149, "grad_norm": 97.70276225776648, "learning_rate": 1.9346661429512946e-07, "loss": 0.4693, "step": 3365, "success_rate.epoch.env.abd": 0.46153846153846156, "success_rate.epoch.env.agentgym:alfworld": 0.4351145038167939, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.43344155844155846, "success_rate.epoch.env.math": 0.8804780876494024, "success_rate.epoch.env.sat": 0.1004566210045662, "success_rate.epoch.env.science": 0.7201848108576379, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4877116901354453, "success_rate.epoch.global": 0.6677209302325582, "success_rate.window.env.abd": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9966216216216216, "tokens_p.mean_below_band": 9.74978320300579e-10, "tokens_p.mean_in_band": 0.58798828125, "tokens_rate.above_band": 0.844106463878327, "tokens_rate.below_band": 0.0038022813688212928, "tokens_rate.in_band": 0.1520912547528517 }, { "epoch": 0.5601728723404256, "grad_norm": 137.23799338425331, "learning_rate": 1.9344690334664603e-07, "loss": 0.6915, "step": 3370, "success_rate.epoch.env.abd": 0.46153846153846156, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.43344155844155846, "success_rate.epoch.env.math": 0.8807947019867549, "success_rate.epoch.env.sat": 0.1004566210045662, "success_rate.epoch.env.science": 0.7203463203463204, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4874554903015746, "success_rate.epoch.global": 0.6678438661710037, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.32, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9976302941922394, "tokens_p.mean_in_band": 0.4926228350903614, "tokens_rate.above_band": 0.959600876125578, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.040399123874422 }, { "epoch": 0.5610039893617021, "grad_norm": 111.91847726305173, "learning_rate": 1.9342716853320343e-07, "loss": 0.5506, "step": 3375, "success_rate.epoch.env.abd": 0.46153846153846156, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.4336569579288026, "success_rate.epoch.env.math": 0.8811096433289299, "success_rate.epoch.env.sat": 0.1004566210045662, "success_rate.epoch.env.science": 0.7204610951008645, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.487514137172844, "success_rate.epoch.global": 0.6680274633512711, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9965679190751445, "tokens_p.mean_in_band": 0.533797554347826, "tokens_rate.above_band": 0.9376693766937669, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06233062330623306 }, { "epoch": 0.5618351063829787, "grad_norm": 67.75984462274953, "learning_rate": 1.9340740988982625e-07, "loss": 0.6136, "step": 3380, "success_rate.epoch.env.abd": 0.46153846153846156, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.4345718901453958, "success_rate.epoch.env.math": 0.8811096433289299, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7202877697841726, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4875400449815108, "success_rate.epoch.global": 0.6679021497405485, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.996171875, "tokens_p.mean_in_band": 0.6792879971590909, "tokens_rate.above_band": 0.9009009009009009, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0990990990990991 }, { "epoch": 0.5626662234042553, "grad_norm": 156.51334171989987, "learning_rate": 1.9338762745158114e-07, "loss": 0.6024, "step": 3385, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.4345718901453958, "success_rate.epoch.env.math": 0.8812664907651715, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7206093705087668, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48653458921990844, "success_rate.epoch.global": 0.6680858941132913, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5666666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9924273574561403, "tokens_p.mean_in_band": 0.6706665917266187, "tokens_rate.above_band": 0.8677450047573739, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13225499524262607 }, { "epoch": 0.5634973404255319, "grad_norm": 142.80790309139422, "learning_rate": 1.9336782125357715e-07, "loss": 0.6905, "step": 3390, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.4338709677419355, "success_rate.epoch.env.math": 0.8814229249011858, "success_rate.epoch.env.sat": 0.09954751131221719, "success_rate.epoch.env.science": 0.7206896551724138, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4864512535570374, "success_rate.epoch.global": 0.6679615242323345, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9947773972602739, "tokens_p.mean_in_band": 0.49404761904761907, "tokens_rate.above_band": 0.9205548549810845, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07944514501891552 }, { "epoch": 0.5643284574468085, "grad_norm": 110.51663173220152, "learning_rate": 1.933479913309654e-07, "loss": 0.6003, "step": 3395, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.4333868378812199, "success_rate.epoch.env.math": 0.881578947368421, "success_rate.epoch.env.sat": 0.09954751131221719, "success_rate.epoch.env.science": 0.7209302325581395, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48644329628360516, "success_rate.epoch.global": 0.6680214298910031, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.0000704887218046, "tokens_p.mean_in_band": 0.5375532670454546, "tokens_rate.above_band": 0.9679767103347889, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03202328966521106 }, { "epoch": 0.5651595744680851, "grad_norm": 102.08720236173353, "learning_rate": 1.9332813771893914e-07, "loss": 0.5505, "step": 3400, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4645669291338583, "success_rate.epoch.env.logic": 0.4333868378812199, "success_rate.epoch.env.math": 0.8817345597897503, "success_rate.epoch.env.sat": 0.09954751131221719, "success_rate.epoch.env.science": 0.7210435779816514, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48646774699677253, "success_rate.epoch.global": 0.6682044657685919, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9925508720930233, "tokens_p.mean_in_band": 0.73486328125, "tokens_rate.above_band": 0.8775510204081632, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12244897959183673 }, { "epoch": 0.5659906914893617, "grad_norm": 190.68819095292852, "learning_rate": 1.933082604527337e-07, "loss": 0.4331, "step": 3405, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46875, "success_rate.epoch.env.logic": 0.4333868378812199, "success_rate.epoch.env.math": 0.8817345597897503, "success_rate.epoch.env.sat": 0.09954751131221719, "success_rate.epoch.env.science": 0.7215226101888953, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48689157454889853, "success_rate.epoch.global": 0.6686325101363804, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9989992721979621, "tokens_p.mean_in_band": 0.61953125, "tokens_rate.above_band": 0.9717114568599717, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028288543140028287 }, { "epoch": 0.5668218085106383, "grad_norm": 103.43171487259264, "learning_rate": 1.932883595676263e-07, "loss": 0.6614, "step": 3410, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46875, "success_rate.epoch.env.logic": 0.4333868378812199, "success_rate.epoch.env.math": 0.8817345597897503, "success_rate.epoch.env.sat": 0.0990990990990991, "success_rate.epoch.env.science": 0.7217615098655991, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.486872527954679, "success_rate.epoch.global": 0.6686924493554328, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9904698581560284, "tokens_p.mean_in_band": 0.69484375, "tokens_rate.above_band": 0.8493975903614458, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15060240963855423 }, { "epoch": 0.5676529255319149, "grad_norm": 208.40341949456763, "learning_rate": 1.9326843509893617e-07, "loss": 0.4698, "step": 3415, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46511627906976744, "success_rate.epoch.env.logic": 0.4333868378812199, "success_rate.epoch.env.math": 0.8820445609436435, "success_rate.epoch.env.sat": 0.09865470852017937, "success_rate.epoch.env.science": 0.7220794058840332, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4865588721058767, "success_rate.epoch.global": 0.6688120632585509, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9868259803921569, "tokens_p.mean_in_band": 0.5959795947488584, "tokens_rate.above_band": 0.7884057971014493, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.21159420289855072 }, { "epoch": 0.5684840425531915, "grad_norm": 123.54851256069378, "learning_rate": 1.932484870820244e-07, "loss": 0.3822, "step": 3420, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46511627906976744, "success_rate.epoch.env.logic": 0.4333868378812199, "success_rate.epoch.env.math": 0.8821989528795812, "success_rate.epoch.env.sat": 0.09865470852017937, "success_rate.epoch.env.science": 0.7219846022241232, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4865642892218792, "success_rate.epoch.global": 0.668870523415978, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9903474903474904, "tokens_p.mean_in_band": 0.6338975694444444, "tokens_rate.above_band": 0.9664179104477612, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033582089552238806 }, { "epoch": 0.5693151595744681, "grad_norm": 431.58645055561846, "learning_rate": 1.9322851555229373e-07, "loss": 0.601, "step": 3425, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.4342948717948718, "success_rate.epoch.env.math": 0.8821989528795812, "success_rate.epoch.env.sat": 0.09865470852017937, "success_rate.epoch.env.science": 0.7223804100227791, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48635756414742487, "success_rate.epoch.global": 0.669112252384446, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9900081566068516, "tokens_p.mean_in_band": 0.664296875, "tokens_rate.above_band": 0.8909883720930233, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10901162790697674 }, { "epoch": 0.5701462765957447, "grad_norm": 115.4163781656245, "learning_rate": 1.9320852054518884e-07, "loss": 0.615, "step": 3430, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.4352, "success_rate.epoch.env.math": 0.8821989528795812, "success_rate.epoch.env.sat": 0.09865470852017937, "success_rate.epoch.env.science": 0.7226173541963016, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48646138890912033, "success_rate.epoch.global": 0.6693548387096774, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9948593073593074, "tokens_p.mean_in_band": 0.64171875, "tokens_rate.above_band": 0.90234375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09765625 }, { "epoch": 0.5709773936170213, "grad_norm": 217.45249092490982, "learning_rate": 1.931885020961959e-07, "loss": 0.5385, "step": 3435, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.43610223642172524, "success_rate.epoch.env.math": 0.8821989528795812, "success_rate.epoch.env.sat": 0.09865470852017937, "success_rate.epoch.env.science": 0.7227272727272728, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4865534029957292, "success_rate.epoch.global": 0.6695349688758696, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9968251088534107, "tokens_p.mean_in_band": 0.6231026785714285, "tokens_rate.above_band": 0.9516574585635359, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04834254143646409 }, { "epoch": 0.5718085106382979, "grad_norm": 94.78379157521786, "learning_rate": 1.9316846024084278e-07, "loss": 0.6162, "step": 3440, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.43610223642172524, "success_rate.epoch.env.math": 0.8821989528795812, "success_rate.epoch.env.sat": 0.09865470852017937, "success_rate.epoch.env.science": 0.7227105188545506, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4865518799163908, "success_rate.epoch.global": 0.6695922472115561, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995872641509434, "tokens_p.mean_in_band": 0.6285807291666666, "tokens_rate.above_band": 0.9298245614035088, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07017543859649122 }, { "epoch": 0.5726396276595744, "grad_norm": 136.5194251574622, "learning_rate": 1.9314839501469886e-07, "loss": 0.685, "step": 3445, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.43610223642172524, "success_rate.epoch.env.math": 0.8821989528795812, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.7229461756373937, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48653326482338616, "success_rate.epoch.global": 0.6696510140690664, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.2916666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9977341389728097, "tokens_p.mean_in_band": 0.580109126984127, "tokens_rate.above_band": 0.9131034482758621, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08689655172413793 }, { "epoch": 0.573470744680851, "grad_norm": 105.46169193868766, "learning_rate": 1.93128306453375e-07, "loss": 0.8601, "step": 3450, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.43610223642172524, "success_rate.epoch.env.math": 0.8825065274151436, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.7233380480905234, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48659685091326726, "success_rate.epoch.global": 0.67007299270073, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9887867647058823, "tokens_p.mean_in_band": 0.7763671875, "tokens_rate.above_band": 0.8947368421052632, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10526315789473684 }, { "epoch": 0.5743018617021277, "grad_norm": 101.24033812462726, "learning_rate": 1.931081945925235e-07, "loss": 0.458, "step": 3455, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.25, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.43610223642172524, "success_rate.epoch.env.math": 0.8825065274151436, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.7238068342276193, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4866394678348214, "success_rate.epoch.global": 0.6704338315712723, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9988755622188905, "tokens_p.mean_in_band": 0.5683001893939394, "tokens_rate.above_band": 0.9528571428571428, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.047142857142857146 }, { "epoch": 0.5751329787234043, "grad_norm": 118.34969737607743, "learning_rate": 1.9308805946783794e-07, "loss": 0.5792, "step": 3460, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.3076923076923077, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46564885496183206, "success_rate.epoch.env.logic": 0.4354066985645933, "success_rate.epoch.env.math": 0.8826597131681877, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.7241184767277856, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4922369216086175, "success_rate.epoch.global": 0.6707317073170732, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9987827145465612, "tokens_p.mean_in_band": 0.6183035714285714, "tokens_rate.above_band": 0.9750741839762611, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024925816023738872 }, { "epoch": 0.5759640957446809, "grad_norm": 128.50248204797833, "learning_rate": 1.9306790111505326e-07, "loss": 0.7503, "step": 3465, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.3076923076923077, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46564885496183206, "success_rate.epoch.env.logic": 0.4354066985645933, "success_rate.epoch.env.math": 0.8828125, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.7243517474633596, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4922720177511071, "success_rate.epoch.global": 0.6709712622771917, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9971802566225165, "tokens_p.mean_in_band": 0.5270432692307693, "tokens_rate.above_band": 0.9489395129615082, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05106048703849175 }, { "epoch": 0.5767952127659575, "grad_norm": 124.11961737165471, "learning_rate": 1.9304771956994556e-07, "loss": 0.6193, "step": 3470, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.3076923076923077, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46564885496183206, "success_rate.epoch.env.logic": 0.43630573248407645, "success_rate.epoch.env.math": 0.8828125, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.7248945147679325, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4924030905896577, "success_rate.epoch.global": 0.6714493280058118, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.98765625, "tokens_p.mean_in_band": 0.71640625, "tokens_rate.above_band": 0.9090909090909091, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09090909090909091 }, { "epoch": 0.577626329787234, "grad_norm": 80.91745340051162, "learning_rate": 1.930275148683322e-07, "loss": 0.4208, "step": 3475, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.3076923076923077, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46564885496183206, "success_rate.epoch.env.logic": 0.43720190779014306, "success_rate.epoch.env.math": 0.88296488946684, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.7248454187745925, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4924939513878001, "success_rate.epoch.global": 0.6715659589911087, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.575, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9953520569620253, "tokens_p.mean_in_band": 0.6641927083333333, "tokens_rate.above_band": 0.9132947976878613, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08670520231213873 }, { "epoch": 0.5784574468085106, "grad_norm": 138.64385863583678, "learning_rate": 1.9300728704607154e-07, "loss": 0.6964, "step": 3480, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4621212121212121, "success_rate.epoch.env.logic": 0.43720190779014306, "success_rate.epoch.env.math": 0.88296488946684, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.7253856942496494, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4902243705383833, "success_rate.epoch.global": 0.6717391304347826, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.2916666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9919088866338069, "tokens_p.mean_in_band": 0.5242947048611111, "tokens_rate.above_band": 0.8272967138402495, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17270328615975053 }, { "epoch": 0.5792885638297872, "grad_norm": 139.8773584252811, "learning_rate": 1.9298703613906301e-07, "loss": 0.6539, "step": 3485, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4621212121212121, "success_rate.epoch.env.logic": 0.43670886075949367, "success_rate.epoch.env.math": 0.8831168831168831, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.7253363228699552, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.490188877378356, "success_rate.epoch.global": 0.6716120861226705, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7111111111111111, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.996934253915911, "tokens_p.mean_in_band": 0.5658318014705882, "tokens_rate.above_band": 0.9596518987341772, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.040348101265822785 }, { "epoch": 0.5801196808510638, "grad_norm": 103.1866269871044, "learning_rate": 1.9296676218324702e-07, "loss": 0.562, "step": 3490, "success_rate.epoch.env.abd": 0.45, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4621212121212121, "success_rate.epoch.env.logic": 0.43601895734597157, "success_rate.epoch.env.math": 0.8835705045278137, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.7255670680481657, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.490188374030685, "success_rate.epoch.global": 0.6718467654499458, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976766748366013, "tokens_p.mean_in_band": 0.5767045454545454, "tokens_rate.above_band": 0.9737470167064439, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026252983293556086 }, { "epoch": 0.5809507978723404, "grad_norm": 106.91551104646285, "learning_rate": 1.9294646521460488e-07, "loss": 0.6432, "step": 3495, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4318181818181818, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4621212121212121, "success_rate.epoch.env.logic": 0.43601895734597157, "success_rate.epoch.env.math": 0.884020618556701, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.7257974258533856, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48925245240141313, "success_rate.epoch.global": 0.6720808518317993, "success_rate.window.env.abd": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9864296636085627, "tokens_p.mean_in_band": 0.7404364224137931, "tokens_rate.above_band": 0.8493506493506493, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15064935064935064 }, { "epoch": 0.581781914893617, "grad_norm": 112.86493227324142, "learning_rate": 1.9292614526915868e-07, "loss": 0.5273, "step": 3500, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43609022556390975, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4621212121212121, "success_rate.epoch.env.logic": 0.43601895734597157, "success_rate.epoch.env.math": 0.8841698841698842, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.7263334264172019, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4897031169398429, "success_rate.epoch.global": 0.6726126126126126, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9960171568627451, "tokens_p.mean_in_band": 0.73515625, "tokens_rate.above_band": 0.9878934624697336, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012106537530266344 }, { "epoch": 0.5826130319148937, "grad_norm": 144.12948655184826, "learning_rate": 1.9290580238297146e-07, "loss": 0.6722, "step": 3505, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43609022556390975, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4621212121212121, "success_rate.epoch.env.logic": 0.43601895734597157, "success_rate.epoch.env.math": 0.884318766066838, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.7263861799944274, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48972144743749557, "success_rate.epoch.global": 0.672782874617737, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9891129032258065, "tokens_p.mean_in_band": 0.6759868421052632, "tokens_rate.above_band": 0.8908045977011494, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10919540229885058 }, { "epoch": 0.5834441489361702, "grad_norm": 153.49025215579306, "learning_rate": 1.9288543659214672e-07, "loss": 0.6542, "step": 3510, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43609022556390975, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4621212121212121, "success_rate.epoch.env.logic": 0.4353312302839117, "success_rate.epoch.env.math": 0.883183568677792, "success_rate.epoch.env.sat": 0.09777777777777778, "success_rate.epoch.env.science": 0.7264124686891177, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.48951843437450254, "success_rate.epoch.global": 0.6724757455982753, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.21428571428571427, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9941964285714285, "tokens_p.mean_in_band": 0.603125, "tokens_rate.above_band": 0.9333333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06666666666666667 }, { "epoch": 0.5842752659574468, "grad_norm": 170.75337095885664, "learning_rate": 1.9286504793282885e-07, "loss": 0.7141, "step": 3515, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43609022556390975, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46616541353383456, "success_rate.epoch.env.logic": 0.43622047244094486, "success_rate.epoch.env.math": 0.8833333333333333, "success_rate.epoch.env.sat": 0.09777777777777778, "success_rate.epoch.env.science": 0.7265646731571628, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4899943809829791, "success_rate.epoch.global": 0.6727697002333513, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9947587025316456, "tokens_p.mean_in_band": 0.7180347711267606, "tokens_rate.above_band": 0.9468164794007491, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05318352059925094 }, { "epoch": 0.5851063829787234, "grad_norm": 44.56486602665988, "learning_rate": 1.928446364412027e-07, "loss": 0.5004, "step": 3520, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43609022556390975, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46616541353383456, "success_rate.epoch.env.logic": 0.43622047244094486, "success_rate.epoch.env.math": 0.8836317135549873, "success_rate.epoch.env.sat": 0.10176991150442478, "success_rate.epoch.env.science": 0.7270202721466259, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4904258457955031, "success_rate.epoch.global": 0.6732974910394265, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9877485795454546, "tokens_p.mean_below_band": 5.20230969414115e-10, "tokens_p.mean_in_band": 0.8055555555555556, "tokens_rate.above_band": 0.946236559139785, "tokens_rate.below_band": 0.005376344086021506, "tokens_rate.in_band": 0.04838709677419355 }, { "epoch": 0.5859375, "grad_norm": 161.7931973496787, "learning_rate": 1.9282420215349364e-07, "loss": 0.6611, "step": 3525, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43609022556390975, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46616541353383456, "success_rate.epoch.env.logic": 0.43622047244094486, "success_rate.epoch.env.math": 0.8839285714285714, "success_rate.epoch.env.sat": 0.10176991150442478, "success_rate.epoch.env.science": 0.7270457697642164, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49045515084015534, "success_rate.epoch.global": 0.673469387755102, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9953871587030717, "tokens_p.mean_in_band": 0.5174005681818182, "tokens_rate.above_band": 0.9551752241238793, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04482477587612062 }, { "epoch": 0.5867686170212766, "grad_norm": 137.6124261657743, "learning_rate": 1.9280374510596754e-07, "loss": 0.534, "step": 3530, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43609022556390975, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46616541353383456, "success_rate.epoch.env.logic": 0.43622047244094486, "success_rate.epoch.env.math": 0.8840764331210191, "success_rate.epoch.env.sat": 0.10176991150442478, "success_rate.epoch.env.science": 0.7270210409745294, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4904663447404063, "success_rate.epoch.global": 0.6735788344654987, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9962908011869436, "tokens_p.mean_in_band": 0.6245404411764706, "tokens_rate.above_band": 0.9519774011299436, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0480225988700565 }, { "epoch": 0.5875997340425532, "grad_norm": 113.38972715269938, "learning_rate": 1.9278326533493067e-07, "loss": 0.5121, "step": 3535, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43609022556390975, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4626865671641791, "success_rate.epoch.env.logic": 0.43622047244094486, "success_rate.epoch.env.math": 0.8840764331210191, "success_rate.epoch.env.sat": 0.1013215859030837, "success_rate.epoch.env.science": 0.7268454520320707, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49009336647554674, "success_rate.epoch.global": 0.6732726298875201, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.17857142857142858, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9933806960440661, "tokens_p.mean_below_band": 2.5331974029541016e-07, "tokens_p.mean_in_band": 0.5725612017804155, "tokens_rate.above_band": 0.8552462526766595, "tokens_rate.below_band": 0.00042826552462526765, "tokens_rate.in_band": 0.1443254817987152 }, { "epoch": 0.5884308510638298, "grad_norm": 75.89376074218096, "learning_rate": 1.9276276287672958e-07, "loss": 0.6275, "step": 3540, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.44029850746268656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4626865671641791, "success_rate.epoch.env.logic": 0.4371069182389937, "success_rate.epoch.env.math": 0.8840764331210191, "success_rate.epoch.env.sat": 0.1013215859030837, "success_rate.epoch.env.science": 0.7272225289895086, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.490590803262298, "success_rate.epoch.global": 0.6736804564907275, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9939304461942258, "tokens_p.mean_in_band": 0.7470703125, "tokens_rate.above_band": 0.9407407407407408, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05925925925925926 }, { "epoch": 0.5892619680851063, "grad_norm": 165.31117024930202, "learning_rate": 1.9274223776775115e-07, "loss": 0.5662, "step": 3545, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.44029850746268656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4626865671641791, "success_rate.epoch.env.logic": 0.4371069182389937, "success_rate.epoch.env.math": 0.8840764331210191, "success_rate.epoch.env.sat": 0.10087719298245613, "success_rate.epoch.env.science": 0.727448275862069, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49057092634883737, "success_rate.epoch.global": 0.6737348538845331, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.2708333333333333, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9960091538289797, "tokens_p.mean_below_band": 2.632538477579753e-07, "tokens_p.mean_in_band": 0.5725049407114624, "tokens_rate.above_band": 0.9459345300950369, "tokens_rate.below_band": 0.0006335797254487857, "tokens_rate.in_band": 0.05343189017951425 }, { "epoch": 0.590093085106383, "grad_norm": 72.27471814405673, "learning_rate": 1.9272169004442236e-07, "loss": 0.6105, "step": 3550, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.44029850746268656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4626865671641791, "success_rate.epoch.env.logic": 0.438871473354232, "success_rate.epoch.env.math": 0.8842239185750637, "success_rate.epoch.env.sat": 0.10043668122270742, "success_rate.epoch.env.science": 0.7277486910994765, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49073201217128654, "success_rate.epoch.global": 0.6740213523131673, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99619140625, "tokens_p.mean_in_band": 0.5677083333333334, "tokens_rate.above_band": 0.898876404494382, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10112359550561797 }, { "epoch": 0.5909242021276596, "grad_norm": 371.0554446926751, "learning_rate": 1.927011197432104e-07, "loss": 0.7198, "step": 3555, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.44029850746268656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4626865671641791, "success_rate.epoch.env.logic": 0.438871473354232, "success_rate.epoch.env.math": 0.8842239185750637, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.7280484448114506, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4911148211327614, "success_rate.epoch.global": 0.6743111111111111, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964387464387464, "tokens_p.mean_in_band": 0.669921875, "tokens_rate.above_band": 0.9564032697547684, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043596730245231606 }, { "epoch": 0.5917553191489362, "grad_norm": 161.67648638252908, "learning_rate": 1.9268052690062256e-07, "loss": 0.5927, "step": 3560, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43703703703703706, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4626865671641791, "success_rate.epoch.env.logic": 0.440625, "success_rate.epoch.env.math": 0.8845177664974619, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.727997799779978, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4909998446883108, "success_rate.epoch.global": 0.6744186046511628, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9962912087912088, "tokens_p.mean_in_band": 0.7169744318181818, "tokens_rate.above_band": 0.9763948497854077, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023605150214592276 }, { "epoch": 0.5925864361702128, "grad_norm": 146.5230316945275, "learning_rate": 1.9265991155320603e-07, "loss": 0.5661, "step": 3565, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43703703703703706, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4626865671641791, "success_rate.epoch.env.logic": 0.440809968847352, "success_rate.epoch.env.math": 0.8846641318124208, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.7279472382522671, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49102536947327446, "success_rate.epoch.global": 0.6744103564461784, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9964041095890411, "tokens_p.mean_in_band": 0.60875, "tokens_rate.above_band": 0.9668874172185431, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033112582781456956 }, { "epoch": 0.5934175531914894, "grad_norm": 79.66714075094525, "learning_rate": 1.9263927373754806e-07, "loss": 0.5814, "step": 3570, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43703703703703706, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4626865671641791, "success_rate.epoch.env.logic": 0.4409937888198758, "success_rate.epoch.env.math": 0.8846641318124208, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.728171334431631, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4910624527598097, "success_rate.epoch.global": 0.6745216158752658, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975215517241379, "tokens_p.mean_in_band": 0.6535073138297872, "tokens_rate.above_band": 0.9610604805302403, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03893951946975974 }, { "epoch": 0.594248670212766, "grad_norm": 152.75395104851907, "learning_rate": 1.9261861349027565e-07, "loss": 0.6894, "step": 3575, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43703703703703706, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45925925925925926, "success_rate.epoch.env.logic": 0.44031007751937984, "success_rate.epoch.env.math": 0.8846641318124208, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.7286184210526315, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49072936797940836, "success_rate.epoch.global": 0.6746284501061571, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9930287263210369, "tokens_p.mean_in_band": 0.5168019923580786, "tokens_rate.above_band": 0.8975391498881432, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10246085011185682 }, { "epoch": 0.5950797872340425, "grad_norm": 96.2735204423041, "learning_rate": 1.9259793084805577e-07, "loss": 0.6407, "step": 3580, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4338235294117647, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45588235294117646, "success_rate.epoch.env.logic": 0.44049459041731065, "success_rate.epoch.env.math": 0.8848101265822785, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.7284931506849315, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49014889737547507, "success_rate.epoch.global": 0.6743240855274784, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9932492253209385, "tokens_p.mean_below_band": 8.307397365570068e-07, "tokens_p.mean_in_band": 0.5261439087759815, "tokens_rate.above_band": 0.8388414407723728, "tokens_rate.below_band": 0.0003713330857779428, "tokens_rate.in_band": 0.16078722614184923 }, { "epoch": 0.5959109042553191, "grad_norm": 103.96724978949956, "learning_rate": 1.9257722584759494e-07, "loss": 0.5468, "step": 3585, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4338235294117647, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45588235294117646, "success_rate.epoch.env.logic": 0.44135802469135804, "success_rate.epoch.env.math": 0.8848101265822785, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.7287903667214012, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49025441104006756, "success_rate.epoch.global": 0.674611581920904, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9917328042328042, "tokens_p.mean_in_band": 0.7561383928571429, "tokens_rate.above_band": 0.9310344827586207, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06896551724137931 }, { "epoch": 0.5967420212765957, "grad_norm": 123.2539519863408, "learning_rate": 1.9255649852563953e-07, "loss": 0.5635, "step": 3590, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4338235294117647, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45588235294117646, "success_rate.epoch.env.logic": 0.4406779661016949, "success_rate.epoch.env.math": 0.8852459016393442, "success_rate.epoch.env.sat": 0.1038961038961039, "success_rate.epoch.env.science": 0.7286652078774617, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49017975971575933, "success_rate.epoch.global": 0.6744842179509787, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.375, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9985950272479565, "tokens_p.mean_in_band": 0.6500264830508474, "tokens_rate.above_band": 0.9255989911727617, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07440100882723834 }, { "epoch": 0.5975731382978723, "grad_norm": 72.45022684216352, "learning_rate": 1.9253574891897544e-07, "loss": 0.7369, "step": 3595, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43795620437956206, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45588235294117646, "success_rate.epoch.env.logic": 0.4406779661016949, "success_rate.epoch.env.math": 0.8844221105527639, "success_rate.epoch.env.sat": 0.1038961038961039, "success_rate.epoch.env.science": 0.7289617486338797, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49050752559190797, "success_rate.epoch.global": 0.6747666842754007, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9950358072916666, "tokens_p.mean_below_band": 1.7848833522293717e-11, "tokens_p.mean_in_band": 0.7866908482142857, "tokens_rate.above_band": 0.9297820823244553, "tokens_rate.below_band": 0.002421307506053269, "tokens_rate.in_band": 0.06779661016949153 }, { "epoch": 0.598404255319149, "grad_norm": 213.8381891051392, "learning_rate": 1.9251497706442813e-07, "loss": 0.623, "step": 3600, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43795620437956206, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45588235294117646, "success_rate.epoch.env.logic": 0.44086021505376344, "success_rate.epoch.env.math": 0.8845671267252195, "success_rate.epoch.env.sat": 0.1038961038961039, "success_rate.epoch.env.science": 0.7290586630286494, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49054608736638927, "success_rate.epoch.global": 0.6748725162651662, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7222222222222222, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9940130624092888, "tokens_p.mean_below_band": 9.74978320300579e-10, "tokens_p.mean_in_band": 0.6457868303571429, "tokens_rate.above_band": 0.9596100278551533, "tokens_rate.below_band": 0.001392757660167131, "tokens_rate.in_band": 0.03899721448467967 }, { "epoch": 0.5992353723404256, "grad_norm": 675.4946975536126, "learning_rate": 1.9249418299886257e-07, "loss": 0.5597, "step": 3605, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43795620437956206, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45588235294117646, "success_rate.epoch.env.logic": 0.44086021505376344, "success_rate.epoch.env.math": 0.8836045056320401, "success_rate.epoch.env.sat": 0.1038961038961039, "success_rate.epoch.env.science": 0.728956687551076, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49044930585995716, "success_rate.epoch.global": 0.6748024582967516, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9929288321167883, "tokens_p.mean_below_band": 5.995204332975845e-15, "tokens_p.mean_in_band": 0.6825884650735294, "tokens_rate.above_band": 0.8838709677419355, "tokens_rate.below_band": 0.0064516129032258064, "tokens_rate.in_band": 0.10967741935483871 }, { "epoch": 0.6000664893617021, "grad_norm": 119.74339944984386, "learning_rate": 1.9247336675918312e-07, "loss": 0.5385, "step": 3610, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43795620437956206, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45588235294117646, "success_rate.epoch.env.logic": 0.44086021505376344, "success_rate.epoch.env.math": 0.88375, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.728905824714208, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4904171970869623, "success_rate.epoch.global": 0.6747368421052632, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9932291666666667, "tokens_p.mean_in_band": 0.6618381076388888, "tokens_rate.above_band": 0.8695652173913043, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13043478260869565 }, { "epoch": 0.6008976063829787, "grad_norm": 115.92246519737863, "learning_rate": 1.924525283823335e-07, "loss": 0.6864, "step": 3615, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43795620437956206, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45588235294117646, "success_rate.epoch.env.logic": 0.44086021505376344, "success_rate.epoch.env.math": 0.8838951310861424, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7292006525285482, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49045719335064253, "success_rate.epoch.global": 0.6750219106047327, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9928482587064676, "tokens_p.mean_in_band": 0.6117788461538461, "tokens_rate.above_band": 0.9392523364485982, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06074766355140187 }, { "epoch": 0.6017287234042553, "grad_norm": 121.2137720974078, "learning_rate": 1.9243166790529677e-07, "loss": 0.577, "step": 3620, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43795620437956206, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45588235294117646, "success_rate.epoch.env.logic": 0.44086021505376344, "success_rate.epoch.env.math": 0.8840399002493765, "success_rate.epoch.env.sat": 0.10300429184549356, "success_rate.epoch.env.science": 0.7292232482346551, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49043204615543945, "success_rate.epoch.global": 0.675013132551217, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9884588068181818, "tokens_p.mean_in_band": 0.6744342672413793, "tokens_rate.above_band": 0.8585365853658536, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14146341463414633 }, { "epoch": 0.6025598404255319, "grad_norm": 141.7195189438846, "learning_rate": 1.924107853650951e-07, "loss": 0.4776, "step": 3625, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43795620437956206, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45588235294117646, "success_rate.epoch.env.logic": 0.44086021505376344, "success_rate.epoch.env.math": 0.8841843088418431, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.7293192297260646, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.490413882592029, "success_rate.epoch.global": 0.6750612102133613, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9870065789473684, "tokens_p.mean_in_band": 0.7389547413793104, "tokens_rate.above_band": 0.867579908675799, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1324200913242009 }, { "epoch": 0.6033909574468085, "grad_norm": 63.784874024937196, "learning_rate": 1.923898807987899e-07, "loss": 0.6139, "step": 3630, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43795620437956206, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45588235294117646, "success_rate.epoch.env.logic": 0.44086021505376344, "success_rate.epoch.env.math": 0.8841843088418431, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.7296346414073072, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49044255638123285, "success_rate.epoch.global": 0.6753405518686693, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9925742574257426, "tokens_p.mean_in_band": 0.5678013392857143, "tokens_rate.above_band": 0.9351851851851852, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06481481481481481 }, { "epoch": 0.6042220744680851, "grad_norm": 152.52185517281163, "learning_rate": 1.923689542434817e-07, "loss": 0.7242, "step": 3635, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43795620437956206, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45588235294117646, "success_rate.epoch.env.logic": 0.4401840490797546, "success_rate.epoch.env.math": 0.8843283582089553, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.7295325587679006, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49038490190429623, "success_rate.epoch.global": 0.6752703174049529, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4285714285714286, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 1.0001420454545455, "tokens_p.mean_in_band": 0.5144159226190477, "tokens_rate.above_band": 0.9401709401709402, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05982905982905983 }, { "epoch": 0.6050531914893617, "grad_norm": 87.9277490668122, "learning_rate": 1.9234800573630993e-07, "loss": 0.6643, "step": 3640, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43795620437956206, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45985401459854014, "success_rate.epoch.env.logic": 0.4401840490797546, "success_rate.epoch.env.math": 0.8843283582089553, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.7297516198704104, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4907658767006483, "success_rate.epoch.global": 0.6754966887417219, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9951847484276729, "tokens_p.mean_in_band": 0.6398111979166666, "tokens_rate.above_band": 0.9814814814814815, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018518518518518517 }, { "epoch": 0.6058843085106383, "grad_norm": 223.88203659517015, "learning_rate": 1.9232703531445314e-07, "loss": 0.6119, "step": 3645, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43795620437956206, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45985401459854014, "success_rate.epoch.env.logic": 0.4401840490797546, "success_rate.epoch.env.math": 0.8843283582089553, "success_rate.epoch.env.sat": 0.10212765957446808, "success_rate.epoch.env.science": 0.7301159342140738, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49075931955101465, "success_rate.epoch.global": 0.6757180156657964, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978387998982965, "tokens_p.mean_below_band": 2.890825271606445e-07, "tokens_p.mean_in_band": 0.5030677356020943, "tokens_rate.above_band": 0.9525308791474933, "tokens_rate.below_band": 0.001210946960523129, "tokens_rate.in_band": 0.04625817389198353 }, { "epoch": 0.6067154255319149, "grad_norm": 92.89720607815259, "learning_rate": 1.9230604301512864e-07, "loss": 0.4645, "step": 3650, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45985401459854014, "success_rate.epoch.env.logic": 0.4418960244648318, "success_rate.epoch.env.math": 0.8843283582089553, "success_rate.epoch.env.sat": 0.10212765957446808, "success_rate.epoch.env.science": 0.7300646551724138, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49128054477566024, "success_rate.epoch.global": 0.6758824552251782, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9958784448818898, "tokens_p.mean_in_band": 0.7098858173076923, "tokens_rate.above_band": 0.9287020109689214, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0712979890310786 }, { "epoch": 0.6075465425531915, "grad_norm": 106.10799138425385, "learning_rate": 1.9228502887559265e-07, "loss": 0.6484, "step": 3655, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45985401459854014, "success_rate.epoch.env.logic": 0.44274809160305345, "success_rate.epoch.env.math": 0.8847583643122676, "success_rate.epoch.env.sat": 0.1016949152542373, "success_rate.epoch.env.science": 0.7300134589502019, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49135310229375945, "success_rate.epoch.global": 0.6759854141343984, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9937688821752266, "tokens_p.mean_in_band": 0.6847098214285714, "tokens_rate.above_band": 0.8552971576227391, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14470284237726097 }, { "epoch": 0.6083776595744681, "grad_norm": 122.77511212718184, "learning_rate": 1.9226399293314007e-07, "loss": 0.7303, "step": 3660, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45985401459854014, "success_rate.epoch.env.logic": 0.44274809160305345, "success_rate.epoch.env.math": 0.8851851851851852, "success_rate.epoch.env.sat": 0.1016949152542373, "success_rate.epoch.env.science": 0.7302313071543841, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4914117085734958, "success_rate.epoch.global": 0.6763226366001734, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9999175461741425, "tokens_p.mean_in_band": 0.498046875, "tokens_rate.above_band": 0.9467110741049126, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05328892589508743 }, { "epoch": 0.6092087765957447, "grad_norm": 143.2044258195269, "learning_rate": 1.9224293522510458e-07, "loss": 0.6125, "step": 3665, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45985401459854014, "success_rate.epoch.env.logic": 0.4420731707317073, "success_rate.epoch.env.math": 0.8851851851851852, "success_rate.epoch.env.sat": 0.1016949152542373, "success_rate.epoch.env.science": 0.730593607305936, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49138328850805996, "success_rate.epoch.global": 0.676485877664183, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9969251093294461, "tokens_p.mean_in_band": 0.47508138020833335, "tokens_rate.above_band": 0.8511166253101737, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1488833746898263 }, { "epoch": 0.6100398936170213, "grad_norm": 164.88449834660247, "learning_rate": 1.9222185578885845e-07, "loss": 0.5198, "step": 3670, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45985401459854014, "success_rate.epoch.env.logic": 0.441400304414003, "success_rate.epoch.env.math": 0.8851851851851852, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.730738255033557, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4912575796253448, "success_rate.epoch.global": 0.6762465373961218, "success_rate.window.env.logic": 0.25, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4166666666666667, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9983226102941176, "tokens_p.mean_in_band": 0.6216517857142857, "tokens_rate.above_band": 0.951048951048951, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04895104895104895 }, { "epoch": 0.6108710106382979, "grad_norm": 66.91480454845104, "learning_rate": 1.9220075466181254e-07, "loss": 0.4978, "step": 3675, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45985401459854014, "success_rate.epoch.env.logic": 0.441400304414003, "success_rate.epoch.env.math": 0.8851851851851852, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.7311712677566337, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49129694441835176, "success_rate.epoch.global": 0.676582497405742, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9906639004149378, "tokens_p.mean_in_band": 0.70546875, "tokens_rate.above_band": 0.9796747967479674, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02032520325203252 }, { "epoch": 0.6117021276595744, "grad_norm": 99.20867828128162, "learning_rate": 1.9217963188141613e-07, "loss": 0.5511, "step": 3680, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45985401459854014, "success_rate.epoch.env.logic": 0.441400304414003, "success_rate.epoch.env.math": 0.8853267570900123, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.7312633832976445, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49131818873160976, "success_rate.epoch.global": 0.676744989633725, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9877300613496932, "tokens_p.mean_in_band": 0.5225694444444444, "tokens_rate.above_band": 0.8190954773869347, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18090452261306533 }, { "epoch": 0.612533244680851, "grad_norm": 163.61691224293878, "learning_rate": 1.92158487485157e-07, "loss": 0.642, "step": 3685, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45985401459854014, "success_rate.epoch.env.logic": 0.44072948328267475, "success_rate.epoch.env.math": 0.8854679802955665, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.7310879443998931, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49125409447492563, "success_rate.epoch.global": 0.6766177739430543, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.0001052188552189, "tokens_p.mean_in_band": 0.49451622596153844, "tokens_rate.above_band": 0.9580645161290322, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041935483870967745 }, { "epoch": 0.6133643617021277, "grad_norm": 173.99193286802117, "learning_rate": 1.921373215105613e-07, "loss": 0.4622, "step": 3690, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45985401459854014, "success_rate.epoch.env.logic": 0.44072948328267475, "success_rate.epoch.env.math": 0.8854679802955665, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.7315185481718708, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49129324027237814, "success_rate.epoch.global": 0.6769522496121358, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9890202702702703, "tokens_p.mean_in_band": 0.8125, "tokens_rate.above_band": 0.961038961038961, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03896103896103896 }, { "epoch": 0.6141954787234043, "grad_norm": 107.5133753495482, "learning_rate": 1.921161339951934e-07, "loss": 0.5603, "step": 3695, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45985401459854014, "success_rate.epoch.env.logic": 0.4409090909090909, "success_rate.epoch.env.math": 0.8856088560885609, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.7316617764737263, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4913353958834023, "success_rate.epoch.global": 0.6770582156389942, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9947321428571428, "tokens_p.mean_in_band": 0.6891927083333333, "tokens_rate.above_band": 0.958904109589041, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0410958904109589 }, { "epoch": 0.6150265957446809, "grad_norm": 95.2283550244856, "learning_rate": 1.9209492497665603e-07, "loss": 0.6047, "step": 3700, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45985401459854014, "success_rate.epoch.env.logic": 0.4409090909090909, "success_rate.epoch.env.math": 0.8857493857493858, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.7314148681055156, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49132572509182176, "success_rate.epoch.global": 0.6769919118912407, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9940900259067358, "tokens_p.mean_in_band": 0.6203835227272727, "tokens_rate.above_band": 0.9212410501193318, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07875894988066826 }, { "epoch": 0.6158577127659575, "grad_norm": 75.2000304208048, "learning_rate": 1.920736944925899e-07, "loss": 0.5378, "step": 3705, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45985401459854014, "success_rate.epoch.env.logic": 0.4409090909090909, "success_rate.epoch.env.math": 0.8857493857493858, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.7317008251264306, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4913517211846323, "success_rate.epoch.global": 0.6772141014617369, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9958912768647282, "tokens_p.mean_below_band": 2.2118911147117615e-08, "tokens_p.mean_in_band": 0.5446671195652174, "tokens_rate.above_band": 0.9705521472392638, "tokens_rate.below_band": 0.001226993865030675, "tokens_rate.in_band": 0.02822085889570552 }, { "epoch": 0.616688829787234, "grad_norm": 162.23841033236585, "learning_rate": 1.9205244258067398e-07, "loss": 0.7912, "step": 3710, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.463768115942029, "success_rate.epoch.env.logic": 0.4402420574886536, "success_rate.epoch.env.math": 0.8858895705521472, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.7317202871576708, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4916230656086722, "success_rate.epoch.global": 0.6771423664777606, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9940029373368147, "tokens_p.mean_in_band": 0.5142435213414634, "tokens_rate.above_band": 0.9033018867924528, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09669811320754718 }, { "epoch": 0.6175199468085106, "grad_norm": 114.93962276721412, "learning_rate": 1.920311692786252e-07, "loss": 0.5944, "step": 3715, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.463768115942029, "success_rate.epoch.env.logic": 0.4395770392749245, "success_rate.epoch.env.math": 0.8858895705521472, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.7320053120849933, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4915885207644534, "success_rate.epoch.global": 0.6772477693891558, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9943773234200743, "tokens_p.mean_in_band": 0.6384055397727273, "tokens_rate.above_band": 0.9385903698534543, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.061409630146545706 }, { "epoch": 0.6183510638297872, "grad_norm": 90.1475398015592, "learning_rate": 1.9200987462419843e-07, "loss": 0.6889, "step": 3720, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.463768115942029, "success_rate.epoch.env.logic": 0.43891402714932126, "success_rate.epoch.env.math": 0.8860294117647058, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.7322186836518046, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4915603571875232, "success_rate.epoch.global": 0.67735299159952, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.999224748966332, "tokens_p.mean_in_band": 0.5221047794117647, "tokens_rate.above_band": 0.952193475815523, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.047806524184476944 }, { "epoch": 0.6191821808510638, "grad_norm": 73.21077156621745, "learning_rate": 1.9198855865518652e-07, "loss": 0.509, "step": 3725, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.463768115942029, "success_rate.epoch.env.logic": 0.4397590361445783, "success_rate.epoch.env.math": 0.8863080684596577, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.7325026511134677, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4916883238376934, "success_rate.epoch.global": 0.6777397260273973, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9948248407643312, "tokens_p.mean_below_band": 5.3085386753082275e-08, "tokens_p.mean_in_band": 0.490234375, "tokens_rate.above_band": 0.9874213836477987, "tokens_rate.below_band": 0.006289308176100629, "tokens_rate.in_band": 0.006289308176100629 }, { "epoch": 0.6200132978723404, "grad_norm": 161.44616468919048, "learning_rate": 1.9196722140942007e-07, "loss": 0.4854, "step": 3730, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.463768115942029, "success_rate.epoch.env.logic": 0.4397590361445783, "success_rate.epoch.env.math": 0.8865853658536585, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.7328567646280116, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4917457248293793, "success_rate.epoch.global": 0.6781255344621173, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9880845323741008, "tokens_p.mean_in_band": 0.7252604166666666, "tokens_rate.above_band": 0.86875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13125 }, { "epoch": 0.620844414893617, "grad_norm": 156.63237383293176, "learning_rate": 1.9194586292476743e-07, "loss": 0.5393, "step": 3735, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.463768115942029, "success_rate.epoch.env.logic": 0.4397590361445783, "success_rate.epoch.env.math": 0.8867235079171741, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.7325396825396825, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49172945755439623, "success_rate.epoch.global": 0.6780037600410186, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9927995391705069, "tokens_p.mean_below_band": 8.585629984736443e-10, "tokens_p.mean_in_band": 0.7116477272727273, "tokens_rate.above_band": 0.9041666666666667, "tokens_rate.below_band": 0.004166666666666667, "tokens_rate.in_band": 0.09166666666666666 }, { "epoch": 0.6216755319148937, "grad_norm": 187.7736773086645, "learning_rate": 1.9192448323913468e-07, "loss": 0.4984, "step": 3740, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.463768115942029, "success_rate.epoch.env.logic": 0.4397590361445783, "success_rate.epoch.env.math": 0.8867235079171741, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.732892998678996, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4917615772034247, "success_rate.epoch.global": 0.6782786885245902, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.98125, "tokens_p.mean_in_band": 0.7797475961538461, "tokens_rate.above_band": 0.9022556390977443, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09774436090225563 }, { "epoch": 0.6225066489361702, "grad_norm": 103.11421192987977, "learning_rate": 1.9190308239046557e-07, "loss": 0.5473, "step": 3745, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4420289855072464, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.463768115942029, "success_rate.epoch.env.logic": 0.4397590361445783, "success_rate.epoch.env.math": 0.8871359223300971, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.7327700026406126, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49178788796474643, "success_rate.epoch.global": 0.6783825285787408, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9946120689655172, "tokens_p.mean_in_band": 0.7087053571428571, "tokens_rate.above_band": 0.9193083573487032, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08069164265129683 }, { "epoch": 0.6233377659574468, "grad_norm": 159.40804030395597, "learning_rate": 1.9188166041674136e-07, "loss": 0.6599, "step": 3750, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43884892086330934, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.463768115942029, "success_rate.epoch.env.logic": 0.43909774436090226, "success_rate.epoch.env.math": 0.8871359223300971, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7330519651806912, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4914262694257123, "success_rate.epoch.global": 0.678254942058623, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.36, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9982612055641422, "tokens_p.mean_in_band": 0.5913514254385965, "tokens_rate.above_band": 0.9578090303478904, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04219096965210955 }, { "epoch": 0.6241688829787234, "grad_norm": 15644.068519740118, "learning_rate": 1.9186021735598078e-07, "loss": 0.6034, "step": 3755, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43884892086330934, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.463768115942029, "success_rate.epoch.env.logic": 0.43909774436090226, "success_rate.epoch.env.math": 0.8871359223300971, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7332104292862787, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.491440675253493, "success_rate.epoch.global": 0.6784133469526727, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9862372819767442, "tokens_p.mean_in_band": 0.5831383689839572, "tokens_rate.above_band": 0.7862857142857143, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.21371428571428572 }, { "epoch": 0.625, "grad_norm": 129.76246222390404, "learning_rate": 1.9183875324624e-07, "loss": 0.7045, "step": 3760, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43884892086330934, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.463768115942029, "success_rate.epoch.env.logic": 0.43993993993993996, "success_rate.epoch.env.math": 0.8872727272727273, "success_rate.epoch.env.sat": 0.0995850622406639, "success_rate.epoch.env.science": 0.7335612835349816, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49152384952722344, "success_rate.epoch.global": 0.6786807208432506, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9924879807692307, "tokens_p.mean_in_band": 0.6262122844827587, "tokens_rate.above_band": 0.8776371308016878, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12236286919831224 }, { "epoch": 0.6258311170212766, "grad_norm": 64.69679973892326, "learning_rate": 1.9181726812561257e-07, "loss": 0.7835, "step": 3765, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43884892086330934, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.463768115942029, "success_rate.epoch.env.logic": 0.43993993993993996, "success_rate.epoch.env.math": 0.8872727272727273, "success_rate.epoch.env.sat": 0.09917355371900827, "success_rate.epoch.env.science": 0.733841303205465, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49151189599529865, "success_rate.epoch.global": 0.6787837608289451, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.989453125, "tokens_p.mean_in_band": 0.5915364583333333, "tokens_rate.above_band": 0.8421052631578947, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15789473684210525 }, { "epoch": 0.6266622340425532, "grad_norm": 138.6439542434078, "learning_rate": 1.917957620322293e-07, "loss": 0.6185, "step": 3770, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43884892086330934, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.463768115942029, "success_rate.epoch.env.logic": 0.43993993993993996, "success_rate.epoch.env.math": 0.8875453446191052, "success_rate.epoch.env.sat": 0.102880658436214, "success_rate.epoch.env.science": 0.7340509320031504, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49189274607359584, "success_rate.epoch.global": 0.6791108094349227, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9047619047619048, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9896496815286624, "tokens_p.mean_in_band": 0.6415441176470589, "tokens_rate.above_band": 0.9022988505747126, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09770114942528736 }, { "epoch": 0.6274933510638298, "grad_norm": 101.2355630812104, "learning_rate": 1.9177423500425829e-07, "loss": 0.4772, "step": 3775, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43884892086330934, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.463768115942029, "success_rate.epoch.env.logic": 0.43993993993993996, "success_rate.epoch.env.math": 0.8875453446191052, "success_rate.epoch.env.sat": 0.102880658436214, "success_rate.epoch.env.science": 0.7343995804929209, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4919244413908477, "success_rate.epoch.global": 0.6793828416412343, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9876302083333334, "tokens_p.mean_in_band": 0.8818359375, "tokens_rate.above_band": 0.972972972972973, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02702702702702703 }, { "epoch": 0.6283244680851063, "grad_norm": 65.2223923537964, "learning_rate": 1.9175268707990468e-07, "loss": 0.5602, "step": 3780, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43884892086330934, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.460431654676259, "success_rate.epoch.env.logic": 0.43993993993993996, "success_rate.epoch.env.math": 0.8876811594202898, "success_rate.epoch.env.sat": 0.102880658436214, "success_rate.epoch.env.science": 0.7345549738219895, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49164760019670983, "success_rate.epoch.global": 0.6794784964442939, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4285714285714286, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9937316536203522, "tokens_p.mean_below_band": 1.2514647096395493e-09, "tokens_p.mean_in_band": 0.7114534198113207, "tokens_rate.above_band": 0.949814126394052, "tokens_rate.below_band": 0.0009293680297397769, "tokens_rate.in_band": 0.04925650557620818 }, { "epoch": 0.629155585106383, "grad_norm": 108.4156628420998, "learning_rate": 1.917311182974108e-07, "loss": 0.5019, "step": 3785, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43884892086330934, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.460431654676259, "success_rate.epoch.env.logic": 0.43993993993993996, "success_rate.epoch.env.math": 0.8880866425992779, "success_rate.epoch.env.sat": 0.102880658436214, "success_rate.epoch.env.science": 0.7347632749149883, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49170339876689045, "success_rate.epoch.global": 0.6798037889039242, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9850206611570248, "tokens_p.mean_in_band": 0.76953125, "tokens_rate.above_band": 0.9029850746268657, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09701492537313433 }, { "epoch": 0.6299867021276596, "grad_norm": 90.50173293620142, "learning_rate": 1.917095286950559e-07, "loss": 0.4909, "step": 3790, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43884892086330934, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.460431654676259, "success_rate.epoch.env.logic": 0.43928035982008995, "success_rate.epoch.env.math": 0.8880866425992779, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.7351097178683386, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49163660065695675, "success_rate.epoch.global": 0.6798445683392464, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9951644974692697, "tokens_p.mean_in_band": 0.7150974025974026, "tokens_rate.above_band": 0.9472602739726027, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05273972602739726 }, { "epoch": 0.6308178191489362, "grad_norm": 80.86148737903343, "learning_rate": 1.9168791831115632e-07, "loss": 0.6606, "step": 3795, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.43884892086330934, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.460431654676259, "success_rate.epoch.env.logic": 0.43928035982008995, "success_rate.epoch.env.math": 0.8882211538461539, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.735317149569303, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49166768637948777, "success_rate.epoch.global": 0.68006078001013, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9923406862745098, "tokens_p.mean_in_band": 0.5731336805555556, "tokens_rate.above_band": 0.85, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15 }, { "epoch": 0.6316489361702128, "grad_norm": 88.64463313960053, "learning_rate": 1.9166628718406513e-07, "loss": 0.6239, "step": 3800, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4357142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.460431654676259, "success_rate.epoch.env.logic": 0.43862275449101795, "success_rate.epoch.env.math": 0.8883553421368547, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.735263432446531, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49133025280583625, "success_rate.epoch.global": 0.6798785629954461, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9982160433070866, "tokens_p.mean_below_band": 8.754432201385498e-08, "tokens_p.mean_in_band": 0.516812193627451, "tokens_rate.above_band": 0.9670050761421319, "tokens_rate.below_band": 0.0006345177664974619, "tokens_rate.in_band": 0.03236040609137056 }, { "epoch": 0.6324800531914894, "grad_norm": 161.27649080880607, "learning_rate": 1.9164463535217228e-07, "loss": 0.6293, "step": 3805, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4357142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.460431654676259, "success_rate.epoch.env.logic": 0.43946188340807174, "success_rate.epoch.env.math": 0.888622754491018, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.735539343408025, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4914559302815373, "success_rate.epoch.global": 0.680256064690027, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9848214285714286, "tokens_p.mean_below_band": 4.731118679046631e-07, "tokens_p.mean_in_band": 0.1667052801724138, "tokens_rate.above_band": 0.30230326295585414, "tokens_rate.below_band": 0.0019193857965451055, "tokens_rate.in_band": 0.6957773512476008 }, { "epoch": 0.633311170212766, "grad_norm": 141.82728281625657, "learning_rate": 1.9162296285390452e-07, "loss": 0.4345, "step": 3810, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4357142857142857, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4642857142857143, "success_rate.epoch.env.logic": 0.44029850746268656, "success_rate.epoch.env.math": 0.8888888888888888, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.7358146798542425, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.4919315806913698, "success_rate.epoch.global": 0.6806864064602961, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9961062300319489, "tokens_p.mean_in_band": 0.8236177884615384, "tokens_rate.above_band": 0.9796557120500783, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02034428794992175 }, { "epoch": 0.6341422872340425, "grad_norm": 44.44347192656521, "learning_rate": 1.9160126972772515e-07, "loss": 0.3665, "step": 3815, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4326241134751773, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4642857142857143, "success_rate.epoch.env.logic": 0.44029850746268656, "success_rate.epoch.env.math": 0.8888888888888888, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.7357607282184655, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49164575124819837, "success_rate.epoch.global": 0.6805648957632818, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9934731012658228, "tokens_p.mean_in_band": 0.7404296875, "tokens_rate.above_band": 0.9693251533742331, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03067484662576687 }, { "epoch": 0.6349734042553191, "grad_norm": 101.34778655927114, "learning_rate": 1.9157955601213423e-07, "loss": 0.3338, "step": 3820, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4326241134751773, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4642857142857143, "success_rate.epoch.env.logic": 0.4396423248882265, "success_rate.epoch.env.math": 0.8890214797136038, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.7361038961038961, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49162934907871525, "success_rate.epoch.global": 0.6807724601175483, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990834497206704, "tokens_p.mean_in_band": 0.6033528645833334, "tokens_rate.above_band": 0.9675675675675676, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032432432432432434 }, { "epoch": 0.6358045212765957, "grad_norm": 96.80033436536117, "learning_rate": 1.9155782174566824e-07, "loss": 0.5536, "step": 3825, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4295774647887324, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4642857142857143, "success_rate.epoch.env.logic": 0.4396423248882265, "success_rate.epoch.env.math": 0.8890214797136038, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.7359273670557718, "success_rate.epoch.env.webshop": 0.2857142857142857, "success_rate.epoch.env_macro_mean": 0.49133633292102713, "success_rate.epoch.global": 0.6805905049488341, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.17857142857142858, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9995021446078431, "tokens_p.mean_in_band": 0.627197265625, "tokens_rate.above_band": 0.9532710280373832, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04672897196261682 }, { "epoch": 0.6366356382978723, "grad_norm": 91.36916820260075, "learning_rate": 1.9153606696690016e-07, "loss": 0.6951, "step": 3830, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.42657342657342656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4642857142857143, "success_rate.epoch.env.logic": 0.4396423248882265, "success_rate.epoch.env.math": 0.8890214797136038, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.736132711249352, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48783515294502605, "success_rate.epoch.global": 0.6805229634596044, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9973214285714286, "tokens_p.mean_in_band": 0.4593385686528497, "tokens_rate.above_band": 0.8886324293133295, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11136757068667051 }, { "epoch": 0.637466755319149, "grad_norm": 71.62693560785094, "learning_rate": 1.9151429171443947e-07, "loss": 0.4961, "step": 3835, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.42657342657342656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46099290780141844, "success_rate.epoch.env.logic": 0.44047619047619047, "success_rate.epoch.env.math": 0.8891537544696066, "success_rate.epoch.env.sat": 0.10204081632653061, "success_rate.epoch.env.science": 0.7362694300518134, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48759804863550066, "success_rate.epoch.global": 0.680509042196919, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9948992286638467, "tokens_p.mean_below_band": 1.4528632164001465e-07, "tokens_p.mean_in_band": 0.5574793198529412, "tokens_rate.above_band": 0.9215776198119697, "tokens_rate.below_band": 0.00045861041045631735, "tokens_rate.in_band": 0.07796376977757395 }, { "epoch": 0.6382978723404256, "grad_norm": 112.22480690159027, "learning_rate": 1.9149249602693181e-07, "loss": 0.5474, "step": 3840, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.42657342657342656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46099290780141844, "success_rate.epoch.env.logic": 0.44047619047619047, "success_rate.epoch.env.math": 0.8894173602853745, "success_rate.epoch.env.sat": 0.1016260162601626, "success_rate.epoch.env.science": 0.7364060072501295, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48759671981256575, "success_rate.epoch.global": 0.6806090011711561, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9932397959183673, "tokens_p.mean_in_band": 0.5771484375, "tokens_rate.above_band": 0.8477508650519031, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1522491349480969 }, { "epoch": 0.6391289893617021, "grad_norm": 226.38365982216172, "learning_rate": 1.914706799430593e-07, "loss": 0.5598, "step": 3845, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.42657342657342656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46099290780141844, "success_rate.epoch.env.logic": 0.44047619047619047, "success_rate.epoch.env.math": 0.8894173602853745, "success_rate.epoch.env.sat": 0.1016260162601626, "success_rate.epoch.env.science": 0.7368829154820367, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4876400751063755, "success_rate.epoch.global": 0.6809826203208557, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9802389705882353, "tokens_p.mean_in_band": 0.8001302083333334, "tokens_rate.above_band": 0.9006622516556292, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09933774834437085 }, { "epoch": 0.6399601063829787, "grad_norm": 120.31993467942212, "learning_rate": 1.9144884350154004e-07, "loss": 0.803, "step": 3850, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.42657342657342656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46099290780141844, "success_rate.epoch.env.logic": 0.4406528189910979, "success_rate.epoch.env.math": 0.8895486935866983, "success_rate.epoch.env.sat": 0.10121457489878542, "success_rate.epoch.env.science": 0.7370188581761818, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48764302630173906, "success_rate.epoch.global": 0.6809682804674457, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983684738955824, "tokens_p.mean_in_band": 0.6465320121951219, "tokens_rate.above_band": 0.9479695431472082, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05203045685279188 }, { "epoch": 0.6407912234042553, "grad_norm": 101.31314982561518, "learning_rate": 1.914269867411284e-07, "loss": 0.5003, "step": 3855, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.42657342657342656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46099290780141844, "success_rate.epoch.env.logic": 0.4414814814814815, "success_rate.epoch.env.math": 0.8898104265402843, "success_rate.epoch.env.sat": 0.10080645161290322, "success_rate.epoch.env.science": 0.7372903225806452, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4877297296256073, "success_rate.epoch.global": 0.6812270756918973, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9858732876712328, "tokens_p.mean_in_band": 0.6571969696969697, "tokens_rate.above_band": 0.8690476190476191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13095238095238096 }, { "epoch": 0.6416223404255319, "grad_norm": 106.63928488169368, "learning_rate": 1.9140510970061482e-07, "loss": 0.6351, "step": 3860, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.42657342657342656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46099290780141844, "success_rate.epoch.env.logic": 0.4414814814814815, "success_rate.epoch.env.math": 0.8900709219858156, "success_rate.epoch.env.sat": 0.10080645161290322, "success_rate.epoch.env.science": 0.7374935533780299, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48777188655678144, "success_rate.epoch.global": 0.6814925870398134, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.986359126984127, "tokens_p.mean_in_band": 0.765625, "tokens_rate.above_band": 0.8936170212765957, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10638297872340426 }, { "epoch": 0.6424534574468085, "grad_norm": 96.55301546646024, "learning_rate": 1.913832124188256e-07, "loss": 0.7434, "step": 3865, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.42657342657342656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46099290780141844, "success_rate.epoch.env.logic": 0.4408284023668639, "success_rate.epoch.env.math": 0.8902007083825265, "success_rate.epoch.env.sat": 0.10080645161290322, "success_rate.epoch.env.science": 0.7378315735256246, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48775504359584404, "success_rate.epoch.global": 0.6816971713810316, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952064896755162, "tokens_p.mean_below_band": 6.230038707144558e-11, "tokens_p.mean_in_band": 0.5353954081632653, "tokens_rate.above_band": 0.9313186813186813, "tokens_rate.below_band": 0.0013736263736263737, "tokens_rate.in_band": 0.0673076923076923 }, { "epoch": 0.6432845744680851, "grad_norm": 144.2228275344213, "learning_rate": 1.9136129493462309e-07, "loss": 0.6964, "step": 3870, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.42657342657342656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4647887323943662, "success_rate.epoch.env.logic": 0.4408284023668639, "success_rate.epoch.env.math": 0.8902007083825265, "success_rate.epoch.env.sat": 0.10040160642570281, "success_rate.epoch.env.science": 0.73823605039856, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4881000850757243, "success_rate.epoch.global": 0.6819541375872383, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9892592592592593, "tokens_p.mean_in_band": 0.7662464488636364, "tokens_rate.above_band": 0.8846657929226737, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11533420707732635 }, { "epoch": 0.6441156914893617, "grad_norm": 65.89080473833339, "learning_rate": 1.913393572869055e-07, "loss": 0.5515, "step": 3875, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.42657342657342656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4647887323943662, "success_rate.epoch.env.logic": 0.4410029498525074, "success_rate.epoch.env.math": 0.8903301886792453, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7379912663755459, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4880689611969648, "success_rate.epoch.global": 0.6817125788250913, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9902912621359223, "tokens_p.mean_below_band": 1.6079866327345371e-09, "tokens_p.mean_in_band": 0.72109375, "tokens_rate.above_band": 0.8704225352112676, "tokens_rate.below_band": 0.0028169014084507044, "tokens_rate.in_band": 0.1267605633802817 }, { "epoch": 0.6449468085106383, "grad_norm": 145.92265464986508, "learning_rate": 1.9131739951460672e-07, "loss": 0.4791, "step": 3880, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.42657342657342656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4647887323943662, "success_rate.epoch.env.logic": 0.4410029498525074, "success_rate.epoch.env.math": 0.8903301886792453, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7380035925070567, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4880700817543748, "success_rate.epoch.global": 0.6817578772802654, "success_rate.window.env.science": 0.9, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9855769230769231, "tokens_p.mean_in_band": 0.69375, "tokens_rate.above_band": 0.9122807017543859, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08771929824561403 }, { "epoch": 0.6457779255319149, "grad_norm": 68.93279303730233, "learning_rate": 1.9129542165669648e-07, "loss": 0.5836, "step": 3885, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.42657342657342656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4647887323943662, "success_rate.epoch.env.logic": 0.4410029498525074, "success_rate.epoch.env.math": 0.8905882352941177, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7382051282051282, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4881118619646424, "success_rate.epoch.global": 0.6820215410107705, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994921875, "tokens_p.mean_in_band": 0.8056640625, "tokens_rate.above_band": 0.975609756097561, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024390243902439025 }, { "epoch": 0.6466090425531915, "grad_norm": 85.7485280071227, "learning_rate": 1.9127342375218e-07, "loss": 0.4951, "step": 3890, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.42657342657342656, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46853146853146854, "success_rate.epoch.env.logic": 0.44035346097201766, "success_rate.epoch.env.math": 0.8909730363423212, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7384733606557377, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48845243294240837, "success_rate.epoch.global": 0.6823295830575777, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.997413217623498, "tokens_p.mean_in_band": 0.7876674107142857, "tokens_rate.above_band": 0.981651376146789, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01834862385321101 }, { "epoch": 0.6474401595744681, "grad_norm": 58.550751599675415, "learning_rate": 1.9125140584009825e-07, "loss": 0.5233, "step": 3895, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4236111111111111, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46853146853146854, "success_rate.epoch.env.logic": 0.44035346097201766, "success_rate.epoch.env.math": 0.8912280701754386, "success_rate.epoch.env.sat": 0.099601593625498, "success_rate.epoch.env.science": 0.7388078792530058, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4882005084509146, "success_rate.epoch.global": 0.682471501734677, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9989583333333333, "tokens_p.mean_in_band": 0.6354325457317073, "tokens_rate.above_band": 0.9500609013398295, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049939098660170524 }, { "epoch": 0.6482712765957447, "grad_norm": 108.31204408618932, "learning_rate": 1.912293679595276e-07, "loss": 0.5418, "step": 3900, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4236111111111111, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46853146853146854, "success_rate.epoch.env.logic": 0.44035346097201766, "success_rate.epoch.env.math": 0.8912280701754386, "success_rate.epoch.env.sat": 0.099601593625498, "success_rate.epoch.env.science": 0.7390081799591002, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4882187176060141, "success_rate.epoch.global": 0.6826287978863936, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990649606299212, "tokens_p.mean_in_band": 0.6065027573529411, "tokens_rate.above_band": 0.9491778774289985, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05082212257100149 }, { "epoch": 0.6491023936170213, "grad_norm": 87.44474154576085, "learning_rate": 1.912073101495799e-07, "loss": 0.6512, "step": 3905, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4236111111111111, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46853146853146854, "success_rate.epoch.env.logic": 0.44035346097201766, "success_rate.epoch.env.math": 0.8913551401869159, "success_rate.epoch.env.sat": 0.099601593625498, "success_rate.epoch.env.science": 0.7394078611536499, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4882666040792893, "success_rate.epoch.global": 0.6829952168893287, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9930862831858407, "tokens_p.mean_in_band": 0.677734375, "tokens_rate.above_band": 0.9890590809628009, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010940919037199124 }, { "epoch": 0.6499335106382979, "grad_norm": 74.53925459060798, "learning_rate": 1.9118523244940233e-07, "loss": 0.4681, "step": 3910, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4236111111111111, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46853146853146854, "success_rate.epoch.env.logic": 0.44035346097201766, "success_rate.epoch.env.math": 0.8913551401869159, "success_rate.epoch.env.sat": 0.099601593625498, "success_rate.epoch.env.science": 0.7393630573248408, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48826253100394296, "success_rate.epoch.global": 0.6830313014827019, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.7380952380952381, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9982358870967742, "tokens_p.mean_in_band": 0.6105385638297872, "tokens_rate.above_band": 0.9405815423514539, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05941845764854614 }, { "epoch": 0.6507646276595744, "grad_norm": 182.68350725199667, "learning_rate": 1.9116313489817744e-07, "loss": 0.5028, "step": 3915, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.42758620689655175, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46853146853146854, "success_rate.epoch.env.logic": 0.44035346097201766, "success_rate.epoch.env.math": 0.8914819136522754, "success_rate.epoch.env.sat": 0.099601593625498, "success_rate.epoch.env.science": 0.7396284041740901, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48865955064940203, "success_rate.epoch.global": 0.6833443054641212, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9949152542372881, "tokens_p.mean_in_band": 0.8297991071428571, "tokens_rate.above_band": 0.9768211920529801, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023178807947019868 }, { "epoch": 0.651595744680851, "grad_norm": 109.99655061927714, "learning_rate": 1.9114101753512292e-07, "loss": 0.6521, "step": 3920, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4315068493150685, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4652777777777778, "success_rate.epoch.env.logic": 0.44035346097201766, "success_rate.epoch.env.math": 0.8914819136522754, "success_rate.epoch.env.sat": 0.099601593625498, "success_rate.epoch.env.science": 0.7395727365208545, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.488715121923183, "success_rate.epoch.global": 0.6832757770103601, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4285714285714286, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9901678356713427, "tokens_p.mean_in_band": 0.721448131443299, "tokens_rate.above_band": 0.9114155251141552, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08858447488584476 }, { "epoch": 0.6524268617021277, "grad_norm": 293.44204816833684, "learning_rate": 1.9111888039949173e-07, "loss": 0.4753, "step": 3925, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4315068493150685, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4652777777777778, "success_rate.epoch.env.logic": 0.44035346097201766, "success_rate.epoch.env.math": 0.8916083916083916, "success_rate.epoch.env.sat": 0.0992063492063492, "success_rate.epoch.env.science": 0.7397712833545108, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4887087383205124, "success_rate.epoch.global": 0.683371672691423, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9880401234567902, "tokens_p.mean_in_band": 0.6696134868421053, "tokens_rate.above_band": 0.8950276243093923, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10497237569060773 }, { "epoch": 0.6532579787234043, "grad_norm": 386.87634800785116, "learning_rate": 1.910967235305718e-07, "loss": 0.6982, "step": 3930, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4315068493150685, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4689655172413793, "success_rate.epoch.env.logic": 0.43970588235294117, "success_rate.epoch.env.math": 0.8916083916083916, "success_rate.epoch.env.sat": 0.0992063492063492, "success_rate.epoch.env.science": 0.7399034798069596, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4889971344384192, "success_rate.epoch.global": 0.683415435139573, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.575, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9969915305873751, "tokens_p.mean_below_band": 8.866190910339355e-07, "tokens_p.mean_in_band": 0.5332154605263157, "tokens_rate.above_band": 0.945173923059203, "tokens_rate.below_band": 0.0001151808339092375, "tokens_rate.in_band": 0.05471089610688781 }, { "epoch": 0.6540890957446809, "grad_norm": 249.61237126243276, "learning_rate": 1.9107454696768623e-07, "loss": 0.5429, "step": 3935, "success_rate.epoch.env.abd": 0.43902439024390244, "success_rate.epoch.env.agentgym:alfworld": 0.4315068493150685, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4689655172413793, "success_rate.epoch.env.logic": 0.43841642228739003, "success_rate.epoch.env.math": 0.8916083916083916, "success_rate.epoch.env.sat": 0.0992063492063492, "success_rate.epoch.env.science": 0.7399137274803349, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48888084240276675, "success_rate.epoch.global": 0.6832349081364829, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9976532567049808, "tokens_p.mean_in_band": 0.6016090029761905, "tokens_rate.above_band": 0.9688195991091314, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031180400890868598 }, { "epoch": 0.6549202127659575, "grad_norm": 139.10537974662594, "learning_rate": 1.9105235075019292e-07, "loss": 0.4462, "step": 3940, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4315068493150685, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4689655172413793, "success_rate.epoch.env.logic": 0.43841642228739003, "success_rate.epoch.env.math": 0.8916083916083916, "success_rate.epoch.env.sat": 0.10276679841897234, "success_rate.epoch.env.science": 0.7401115618661258, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.488272235305125, "success_rate.epoch.global": 0.683330601540731, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9884143286573146, "tokens_p.mean_in_band": 0.7771080280172413, "tokens_rate.above_band": 0.895870736086176, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10412926391382406 }, { "epoch": 0.655751329787234, "grad_norm": 640.7779559156587, "learning_rate": 1.9103013491748472e-07, "loss": 0.5635, "step": 3945, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43537414965986393, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4657534246575342, "success_rate.epoch.env.logic": 0.43841642228739003, "success_rate.epoch.env.math": 0.8916083916083916, "success_rate.epoch.env.sat": 0.10276679841897234, "success_rate.epoch.env.science": 0.7399898631525595, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4883207361276144, "success_rate.epoch.global": 0.6832104832104832, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9880862329803328, "tokens_p.mean_below_band": 3.413297235965729e-07, "tokens_p.mean_in_band": 0.6906184348739496, "tokens_rate.above_band": 0.8452685421994884, "tokens_rate.below_band": 0.0025575447570332483, "tokens_rate.in_band": 0.15217391304347827 }, { "epoch": 0.6565824468085106, "grad_norm": 171.08655331607352, "learning_rate": 1.910078995089893e-07, "loss": 0.5363, "step": 3950, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43537414965986393, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4657534246575342, "success_rate.epoch.env.logic": 0.43841642228739003, "success_rate.epoch.env.math": 0.8917345750873108, "success_rate.epoch.env.sat": 0.10196078431372549, "success_rate.epoch.env.science": 0.7400557385355967, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48826492201458804, "success_rate.epoch.global": 0.6830905221803896, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9956268221574344, "tokens_p.mean_in_band": 0.6408991228070176, "tokens_rate.above_band": 0.8575, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1425 }, { "epoch": 0.6574135638297872, "grad_norm": 94.74577963310192, "learning_rate": 1.9098564456416907e-07, "loss": 0.593, "step": 3955, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43537414965986393, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.43841642228739003, "success_rate.epoch.env.math": 0.891860465116279, "success_rate.epoch.env.sat": 0.10116731517509728, "success_rate.epoch.env.science": 0.7401215805471124, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48421160798979845, "success_rate.epoch.global": 0.6829108748977923, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9968823070927514, "tokens_p.mean_below_band": 1.418811734765768e-09, "tokens_p.mean_in_band": 0.6426886792452831, "tokens_rate.above_band": 0.9596110695587136, "tokens_rate.below_band": 0.0007479431563201197, "tokens_rate.in_band": 0.039640987284966345 }, { "epoch": 0.6582446808510638, "grad_norm": 102.38102897865964, "learning_rate": 1.9096337012252107e-07, "loss": 0.4952, "step": 3960, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43537414965986393, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.43841642228739003, "success_rate.epoch.env.math": 0.8921113689095128, "success_rate.epoch.env.sat": 0.10116731517509728, "success_rate.epoch.env.science": 0.7403846153846154, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4842583296835018, "success_rate.epoch.global": 0.6832216958013396, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7619047619047619, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9856687898089171, "tokens_p.mean_below_band": 3.4051481634378433e-09, "tokens_p.mean_in_band": 0.677734375, "tokens_rate.above_band": 0.9691358024691358, "tokens_rate.below_band": 0.006172839506172839, "tokens_rate.in_band": 0.024691358024691357 }, { "epoch": 0.6590757978723404, "grad_norm": 140.89190384657059, "learning_rate": 1.9094107622357705e-07, "loss": 0.5041, "step": 3965, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43537414965986393, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.4377745241581259, "success_rate.epoch.env.math": 0.8910776361529548, "success_rate.epoch.env.sat": 0.10116731517509728, "success_rate.epoch.env.science": 0.7405815423514539, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48412390205450323, "success_rate.epoch.global": 0.6831537708129285, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995164641943734, "tokens_p.mean_in_band": 0.6110983455882353, "tokens_rate.above_band": 0.9583333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041666666666666664 }, { "epoch": 0.659906914893617, "grad_norm": 126.38315906716801, "learning_rate": 1.9091876290690317e-07, "loss": 0.591, "step": 3970, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43537414965986393, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.4371345029239766, "success_rate.epoch.env.math": 0.8912037037037037, "success_rate.epoch.env.sat": 0.10116731517509728, "success_rate.epoch.env.science": 0.74077817079333, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48409505430527383, "success_rate.epoch.global": 0.6832490621432067, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9975, "tokens_p.mean_in_band": 0.5703876201923077, "tokens_rate.above_band": 0.9600614439324117, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039938556067588324 }, { "epoch": 0.6607380319148937, "grad_norm": 123.67867687147924, "learning_rate": 1.9089643021210006e-07, "loss": 0.6887, "step": 3975, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43537414965986393, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.4371345029239766, "success_rate.epoch.env.math": 0.8912037037037037, "success_rate.epoch.env.sat": 0.10038610038610038, "success_rate.epoch.env.science": 0.7410398788490661, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4840478264204319, "success_rate.epoch.global": 0.6832328499266743, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.375, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9948694029850746, "tokens_p.mean_in_band": 0.6282848011363636, "tokens_rate.above_band": 0.8204081632653061, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17959183673469387 }, { "epoch": 0.6615691489361702, "grad_norm": 101.74676999248933, "learning_rate": 1.9087407817880287e-07, "loss": 0.5793, "step": 3980, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43537414965986393, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.43731778425655976, "success_rate.epoch.env.math": 0.8912037037037037, "success_rate.epoch.env.sat": 0.10038610038610038, "success_rate.epoch.env.science": 0.7410489157841654, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48406530989931207, "success_rate.epoch.global": 0.6832166693797819, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.7380952380952381, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9931483957219251, "tokens_p.mean_below_band": 1.318767317570746e-10, "tokens_p.mean_in_band": 0.59796142578125, "tokens_rate.above_band": 0.9577464788732394, "tokens_rate.below_band": 0.0012804097311139564, "tokens_rate.in_band": 0.040973111395646605 }, { "epoch": 0.6624002659574468, "grad_norm": 103.0603904471768, "learning_rate": 1.9085170684668094e-07, "loss": 0.5177, "step": 3985, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43537414965986393, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.43731778425655976, "success_rate.epoch.env.math": 0.8912037037037037, "success_rate.epoch.env.sat": 0.10038610038610038, "success_rate.epoch.env.science": 0.7413098236775819, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4840890287987136, "success_rate.epoch.global": 0.6834228078737595, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9912551440329218, "tokens_p.mean_in_band": 0.7557870370370371, "tokens_rate.above_band": 0.9, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1 }, { "epoch": 0.6632313829787234, "grad_norm": 98.10774888228319, "learning_rate": 1.908293162554379e-07, "loss": 0.6219, "step": 3990, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43537414965986393, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.4366812227074236, "success_rate.epoch.env.math": 0.8913294797687862, "success_rate.epoch.env.sat": 0.10038610038610038, "success_rate.epoch.env.science": 0.741570206341218, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4840662649059484, "success_rate.epoch.global": 0.6835689907362262, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992447129909365, "tokens_p.mean_in_band": 0.644287109375, "tokens_rate.above_band": 0.976401179941003, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02359882005899705 }, { "epoch": 0.6640625, "grad_norm": 199.4790131893835, "learning_rate": 1.9080690644481157e-07, "loss": 0.5238, "step": 3995, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43537414965986393, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.4366812227074236, "success_rate.epoch.env.math": 0.8905529953917051, "success_rate.epoch.env.sat": 0.10038610038610038, "success_rate.epoch.env.science": 0.7412622579834046, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.483967680111867, "success_rate.epoch.global": 0.68339016074038, "success_rate.window.env.math": 0.75, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9889520202020202, "tokens_p.mean_in_band": 0.603125, "tokens_rate.above_band": 0.868421052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13157894736842105 }, { "epoch": 0.6648936170212766, "grad_norm": 293.33099379260904, "learning_rate": 1.9078447745457387e-07, "loss": 0.3631, "step": 4000, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43537414965986393, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.4366812227074236, "success_rate.epoch.env.math": 0.8908045977011494, "success_rate.epoch.env.sat": 0.10038610038610038, "success_rate.epoch.env.science": 0.7415871421396283, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48402008797238233, "success_rate.epoch.global": 0.6837495945507622, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9904255319148936, "tokens_p.mean_in_band": 0.5379464285714286, "tokens_rate.above_band": 0.91796875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08203125 }, { "epoch": 0.6657247340425532, "grad_norm": 105.69049519167355, "learning_rate": 1.9076202932453077e-07, "loss": 0.4943, "step": 4005, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43537414965986393, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.4366812227074236, "success_rate.epoch.env.math": 0.8908045977011494, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7419759277833501, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4840203320867115, "success_rate.epoch.global": 0.6839462173983476, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9895833333333334, "tokens_p.mean_in_band": 0.7537802419354839, "tokens_rate.above_band": 0.848780487804878, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15121951219512195 }, { "epoch": 0.6665558510638298, "grad_norm": 132.2764567849794, "learning_rate": 1.9073956209452218e-07, "loss": 0.5579, "step": 4010, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43537414965986393, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.436046511627907, "success_rate.epoch.env.math": 0.8908045977011494, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.7421052631578947, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.483974388840805, "success_rate.epoch.global": 0.6839378238341969, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.20833333333333334, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.986351569713758, "tokens_p.mean_in_band": 0.5285650589330024, "tokens_rate.above_band": 0.7288021534320323, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2711978465679677 }, { "epoch": 0.6673869680851063, "grad_norm": 111.09571279472186, "learning_rate": 1.9071707580442196e-07, "loss": 0.6301, "step": 4015, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.43623188405797103, "success_rate.epoch.env.math": 0.8908045977011494, "success_rate.epoch.env.sat": 0.09961685823754789, "success_rate.epoch.env.science": 0.7421131697546319, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4843039503675027, "success_rate.epoch.global": 0.6838615782664942, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 1.0000566123188406, "tokens_p.mean_in_band": 0.5877130681818182, "tokens_rate.above_band": 0.9616724738675958, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03832752613240418 }, { "epoch": 0.668218085106383, "grad_norm": 85.2019425538384, "learning_rate": 1.9069457049413774e-07, "loss": 0.5406, "step": 4020, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.4370477568740955, "success_rate.epoch.env.math": 0.8908045977011494, "success_rate.epoch.env.sat": 0.09961685823754789, "success_rate.epoch.env.science": 0.7421855463865966, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48438470031732894, "success_rate.epoch.global": 0.6840064620355412, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.0006323433420365, "tokens_p.mean_in_band": 0.6402994791666666, "tokens_rate.above_band": 0.9909443725743855, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009055627425614488 }, { "epoch": 0.6690492021276596, "grad_norm": 188.8013827027474, "learning_rate": 1.9067204620361098e-07, "loss": 0.5383, "step": 4025, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.43641618497109824, "success_rate.epoch.env.math": 0.8909299655568312, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.74225, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48465695726669655, "success_rate.epoch.global": 0.6840490797546013, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7916666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975243506493506, "tokens_p.mean_below_band": 2.868473529815674e-07, "tokens_p.mean_in_band": 0.7132161458333334, "tokens_rate.above_band": 0.9685534591194969, "tokens_rate.below_band": 0.0012578616352201257, "tokens_rate.in_band": 0.03018867924528302 }, { "epoch": 0.6698803191489362, "grad_norm": 102.0986881966108, "learning_rate": 1.906495029728167e-07, "loss": 0.4231, "step": 4030, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.43641618497109824, "success_rate.epoch.env.math": 0.8909299655568312, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.7427002745195908, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4846978913139321, "success_rate.epoch.global": 0.6844057410095146, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9860819327731093, "tokens_p.mean_in_band": 0.7477678571428571, "tokens_rate.above_band": 0.9444444444444444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05555555555555555 }, { "epoch": 0.6707114361702128, "grad_norm": 106.92810365587096, "learning_rate": 1.9062694084176369e-07, "loss": 0.6378, "step": 4035, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.43641618497109824, "success_rate.epoch.env.math": 0.8911798396334479, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.7428286355699676, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.484732276325477, "success_rate.epoch.global": 0.6846091861402095, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9979983987189752, "tokens_p.mean_in_band": 0.23298816568047337, "tokens_rate.above_band": 0.7870195337114052, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.21298046628859482 }, { "epoch": 0.6715425531914894, "grad_norm": 45.515915553187185, "learning_rate": 1.9060435985049412e-07, "loss": 0.5529, "step": 4040, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.4357864357864358, "success_rate.epoch.env.math": 0.8913043478260869, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.7430209371884346, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4847038272915173, "success_rate.epoch.global": 0.6847020933977456, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9970544918998527, "tokens_p.mean_in_band": 0.5982142857142857, "tokens_rate.above_band": 0.9603960396039604, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039603960396039604 }, { "epoch": 0.672373670212766, "grad_norm": 100.32203937573648, "learning_rate": 1.905817600390838e-07, "loss": 0.6918, "step": 4045, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.4357864357864358, "success_rate.epoch.env.math": 0.8914285714285715, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.7429708882806668, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48503649934293996, "success_rate.epoch.global": 0.6848367904807847, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9959615384615385, "tokens_p.mean_below_band": 3.0547380447387695e-07, "tokens_p.mean_in_band": 0.7211371527777778, "tokens_rate.above_band": 0.9447674418604651, "tokens_rate.below_band": 0.0029069767441860465, "tokens_rate.in_band": 0.05232558139534884 }, { "epoch": 0.6732047872340425, "grad_norm": 106.78735977629269, "learning_rate": 1.905591414476418e-07, "loss": 0.5212, "step": 4050, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.4357864357864358, "success_rate.epoch.env.math": 0.8914285714285715, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.7430987316587914, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.485048121468224, "success_rate.epoch.global": 0.6849887495981999, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9951105937136205, "tokens_p.mean_in_band": 0.5305278675766284, "tokens_rate.above_band": 0.8916337969690679, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10836620303093211 }, { "epoch": 0.6740359042553191, "grad_norm": 109.54933582165962, "learning_rate": 1.9053650411631063e-07, "loss": 0.4465, "step": 4055, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.4365994236311239, "success_rate.epoch.env.math": 0.8916761687571265, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.7428571428571429, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48512257568382355, "success_rate.epoch.global": 0.6850216728206775, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9876237623762376, "tokens_p.mean_in_band": 0.638671875, "tokens_rate.above_band": 0.926605504587156, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07339449541284404 }, { "epoch": 0.6748670212765957, "grad_norm": 120.08273657092438, "learning_rate": 1.9051384808526595e-07, "loss": 0.4838, "step": 4060, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.4367816091954023, "success_rate.epoch.env.math": 0.8916761687571265, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.7428642343013154, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4851397826845918, "success_rate.epoch.global": 0.6850040096230954, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4523809523809524, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9994904891304348, "tokens_p.mean_in_band": 0.5050027412280702, "tokens_rate.above_band": 0.9281210592686002, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07187894073139975 }, { "epoch": 0.6756981382978723, "grad_norm": 210.45913665194897, "learning_rate": 1.9049117339471675e-07, "loss": 0.4688, "step": 4065, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.43615494978479197, "success_rate.epoch.env.math": 0.8917995444191344, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.7428713116786512, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4850946730144766, "success_rate.epoch.global": 0.6849863803877584, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.999384236453202, "tokens_p.mean_below_band": 6.693881005048752e-10, "tokens_p.mean_in_band": 0.6322443181818181, "tokens_rate.above_band": 0.9157894736842105, "tokens_rate.below_band": 0.0015037593984962407, "tokens_rate.in_band": 0.08270676691729323 }, { "epoch": 0.676529255319149, "grad_norm": 1694.8536667082376, "learning_rate": 1.9046848008490494e-07, "loss": 0.5703, "step": 4070, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.4357142857142857, "success_rate.epoch.env.math": 0.8919226393629124, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.7430624380574826, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48508317821921326, "success_rate.epoch.global": 0.6850192061459667, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.997749162479062, "tokens_p.mean_in_band": 0.5606897865853658, "tokens_rate.above_band": 0.9668016194331984, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03319838056680162 }, { "epoch": 0.6773603723404256, "grad_norm": 103.43872808155407, "learning_rate": 1.9044576819610567e-07, "loss": 0.5774, "step": 4075, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.4357142857142857, "success_rate.epoch.env.math": 0.8920454545454546, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.7433803513981687, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48512324444859767, "success_rate.epoch.global": 0.6853213943076432, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9860248447204969, "tokens_p.mean_in_band": 0.6422991071428571, "tokens_rate.above_band": 0.9583333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041666666666666664 }, { "epoch": 0.6781914893617021, "grad_norm": 150.63548353513985, "learning_rate": 1.9042303776862693e-07, "loss": 0.5108, "step": 4080, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.43509272467902993, "success_rate.epoch.env.math": 0.8920454545454546, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.7436974789915967, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4850955686811588, "success_rate.epoch.global": 0.6854632587859425, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990693573667712, "tokens_p.mean_in_band": 0.6998487903225806, "tokens_rate.above_band": 0.953662182361734, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04633781763826607 }, { "epoch": 0.6790226063829787, "grad_norm": 163.39862280927537, "learning_rate": 1.904002888428097e-07, "loss": 0.4932, "step": 4085, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.43509272467902993, "success_rate.epoch.env.math": 0.8920454545454546, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.7435770750988142, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.485084622872724, "success_rate.epoch.global": 0.6854040242733951, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9950079872204473, "tokens_p.mean_below_band": 9.255018085241318e-09, "tokens_p.mean_in_band": 0.7715692934782609, "tokens_rate.above_band": 0.9287833827893175, "tokens_rate.below_band": 0.002967359050445104, "tokens_rate.in_band": 0.06824925816023739 }, { "epoch": 0.6798537234042553, "grad_norm": 127.01809273104085, "learning_rate": 1.9037752145902772e-07, "loss": 0.4974, "step": 4090, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.4391891891891892, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.43509272467902993, "success_rate.epoch.env.math": 0.8920454545454546, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.7437669711182424, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4851018861472175, "success_rate.epoch.global": 0.6855546687948922, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0001947040498442, "tokens_p.mean_in_band": 0.5637276785714286, "tokens_rate.above_band": 0.948301329394387, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.051698670605613 }, { "epoch": 0.6806848404255319, "grad_norm": 91.43701807536345, "learning_rate": 1.9035473565768752e-07, "loss": 0.6868, "step": 4095, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47297297297297297, "success_rate.epoch.env.logic": 0.43509272467902993, "success_rate.epoch.env.math": 0.8920454545454546, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.7436529455262509, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4848235585069515, "success_rate.epoch.global": 0.6854272959183674, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9899649784482759, "tokens_p.mean_in_band": 0.4842621026011561, "tokens_rate.above_band": 0.728414442700157, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.271585557299843 }, { "epoch": 0.6815159574468085, "grad_norm": 109.57505582230853, "learning_rate": 1.9033193147922832e-07, "loss": 0.5969, "step": 4100, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4697986577181208, "success_rate.epoch.env.logic": 0.43509272467902993, "success_rate.epoch.env.math": 0.8921679909194098, "success_rate.epoch.env.sat": 0.10266159695817491, "success_rate.epoch.env.science": 0.7438423645320197, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4845277223222736, "success_rate.epoch.global": 0.6854093660401401, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.994237385321101, "tokens_p.mean_in_band": 0.7079093992248062, "tokens_rate.above_band": 0.8941755537325676, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10582444626743231 }, { "epoch": 0.6823470744680851, "grad_norm": 82.24666777988774, "learning_rate": 1.9030910896412197e-07, "loss": 0.4774, "step": 4105, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4697986577181208, "success_rate.epoch.env.logic": 0.43509272467902993, "success_rate.epoch.env.math": 0.8922902494331065, "success_rate.epoch.env.sat": 0.10266159695817491, "success_rate.epoch.env.science": 0.7440315038149151, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4845560312128729, "success_rate.epoch.global": 0.6856096784463547, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9970738295318127, "tokens_p.mean_below_band": 2.7008354663848877e-07, "tokens_p.mean_in_band": 0.7067708333333333, "tokens_rate.above_band": 0.9476678043230944, "tokens_rate.below_band": 0.0011376564277588168, "tokens_rate.in_band": 0.051194539249146756 }, { "epoch": 0.6831781914893617, "grad_norm": 79.78910842548002, "learning_rate": 1.9028626815287283e-07, "loss": 0.5331, "step": 4110, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4697986577181208, "success_rate.epoch.env.logic": 0.43509272467902993, "success_rate.epoch.env.math": 0.8924122310305775, "success_rate.epoch.env.sat": 0.10266159695817491, "success_rate.epoch.env.science": 0.7441002949852508, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48457337419176444, "success_rate.epoch.global": 0.6857506361323156, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9860668789808917, "tokens_p.mean_in_band": 0.810302734375, "tokens_rate.above_band": 0.8971428571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10285714285714286 }, { "epoch": 0.6840093085106383, "grad_norm": 84.37397367831892, "learning_rate": 1.9026340908601772e-07, "loss": 0.5112, "step": 4115, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4697986577181208, "success_rate.epoch.env.logic": 0.4358974358974359, "success_rate.epoch.env.math": 0.8927765237020316, "success_rate.epoch.env.sat": 0.10266159695817491, "success_rate.epoch.env.science": 0.7441631850577537, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4846853645519792, "success_rate.epoch.global": 0.6860003178134435, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9939720812182741, "tokens_p.mean_in_band": 0.6252055921052632, "tokens_rate.above_band": 0.9120370370370371, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08796296296296297 }, { "epoch": 0.6848404255319149, "grad_norm": 104.32115444110507, "learning_rate": 1.9024053180412592e-07, "loss": 0.5015, "step": 4120, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4697986577181208, "success_rate.epoch.env.logic": 0.4358974358974359, "success_rate.epoch.env.math": 0.8927765237020316, "success_rate.epoch.env.sat": 0.10606060606060606, "success_rate.epoch.env.science": 0.744351669941061, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4850115003688646, "success_rate.epoch.global": 0.6861997776719072, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9893364928909952, "tokens_p.mean_in_band": 0.7386067708333334, "tokens_rate.above_band": 0.8978723404255319, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10212765957446808 }, { "epoch": 0.6856715425531915, "grad_norm": 226.9499489073709, "learning_rate": 1.9021763634779897e-07, "loss": 0.5772, "step": 4125, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4697986577181208, "success_rate.epoch.env.logic": 0.4358974358974359, "success_rate.epoch.env.math": 0.8927765237020316, "success_rate.epoch.env.sat": 0.10566037735849057, "success_rate.epoch.env.science": 0.7445398773006136, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48499222570135886, "success_rate.epoch.global": 0.6862402793207427, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9940599173553719, "tokens_p.mean_in_band": 0.7047697368421053, "tokens_rate.above_band": 0.9502617801047121, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049738219895287955 }, { "epoch": 0.6865026595744681, "grad_norm": 100.18664374400898, "learning_rate": 1.9019472275767072e-07, "loss": 0.4131, "step": 4130, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4697986577181208, "success_rate.epoch.env.logic": 0.4358974358974359, "success_rate.epoch.env.math": 0.8927765237020316, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7447903898014219, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48497888870472744, "success_rate.epoch.global": 0.6863304789089756, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9918846899224806, "tokens_p.mean_in_band": 0.5349702380952381, "tokens_rate.above_band": 0.9247311827956989, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07526881720430108 }, { "epoch": 0.6873337765957447, "grad_norm": 40.22069364599915, "learning_rate": 1.9017179107440719e-07, "loss": 0.4693, "step": 4135, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47333333333333333, "success_rate.epoch.env.logic": 0.4366998577524893, "success_rate.epoch.env.math": 0.8928974069898534, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7451028403525954, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4854125642782966, "success_rate.epoch.global": 0.6867279062401014, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9955968688845401, "tokens_p.mean_in_band": 0.7885044642857143, "tokens_rate.above_band": 0.9864864864864865, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013513513513513514 }, { "epoch": 0.6881648936170213, "grad_norm": 183.06213638135222, "learning_rate": 1.9014884133870642e-07, "loss": 0.4633, "step": 4140, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47333333333333333, "success_rate.epoch.env.logic": 0.4366998577524893, "success_rate.epoch.env.math": 0.8928974069898534, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.7452276064610867, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48542390665179586, "success_rate.epoch.global": 0.6868271057631412, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9982069672131147, "tokens_p.mean_below_band": 1.0408340855860843e-15, "tokens_p.mean_in_band": 0.5863486842105263, "tokens_rate.above_band": 0.9682539682539683, "tokens_rate.below_band": 0.0015873015873015873, "tokens_rate.in_band": 0.03015873015873016 }, { "epoch": 0.6889960106382979, "grad_norm": 71.36165659838801, "learning_rate": 1.9012587359129867e-07, "loss": 0.4756, "step": 4145, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47333333333333333, "success_rate.epoch.env.logic": 0.4366998577524893, "success_rate.epoch.env.math": 0.8930180180180181, "success_rate.epoch.env.sat": 0.10486891385767791, "success_rate.epoch.env.science": 0.7455389880224884, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48542733833838736, "success_rate.epoch.global": 0.6870156571247825, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9894153225806451, "tokens_p.mean_in_band": 0.6549030172413793, "tokens_rate.above_band": 0.8953068592057761, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10469314079422383 }, { "epoch": 0.6898271276595744, "grad_norm": 83.56963839508559, "learning_rate": 1.9010288787294602e-07, "loss": 0.3838, "step": 4150, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47333333333333333, "success_rate.epoch.env.logic": 0.4366998577524893, "success_rate.epoch.env.math": 0.8931383577052868, "success_rate.epoch.env.sat": 0.10486891385767791, "success_rate.epoch.env.science": 0.745849609375, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.485466516614731, "success_rate.epoch.global": 0.6873123716226892, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9927884615384616, "tokens_p.mean_below_band": 1.3322676295501878e-15, "tokens_p.mean_in_band": 0.6129807692307693, "tokens_rate.above_band": 0.8851063829787233, "tokens_rate.below_band": 0.00425531914893617, "tokens_rate.in_band": 0.11063829787234042 }, { "epoch": 0.690658244680851, "grad_norm": 73.73365539366097, "learning_rate": 1.900798842244425e-07, "loss": 0.4778, "step": 4155, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47333333333333333, "success_rate.epoch.env.logic": 0.4366998577524893, "success_rate.epoch.env.math": 0.8933782267115601, "success_rate.epoch.env.sat": 0.10486891385767791, "success_rate.epoch.env.science": 0.7459775719161384, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48549995584631395, "success_rate.epoch.global": 0.6875493135553101, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9882478632478633, "tokens_p.mean_in_band": 0.6502403846153846, "tokens_rate.above_band": 0.9, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1 }, { "epoch": 0.6914893617021277, "grad_norm": 93.40037029657991, "learning_rate": 1.9005686268661393e-07, "loss": 0.417, "step": 4160, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47333333333333333, "success_rate.epoch.env.logic": 0.4366998577524893, "success_rate.epoch.env.math": 0.8934977578475336, "success_rate.epoch.env.sat": 0.10486891385767791, "success_rate.epoch.env.science": 0.7462250365319045, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48553331909647207, "success_rate.epoch.global": 0.6877956480605487, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9834710743801653, "tokens_p.mean_below_band": 8.003553375601768e-11, "tokens_p.mean_in_band": 0.7975260416666666, "tokens_rate.above_band": 0.9029850746268657, "tokens_rate.below_band": 0.007462686567164179, "tokens_rate.in_band": 0.08955223880597014 }, { "epoch": 0.6923204787234043, "grad_norm": 179.94449149617503, "learning_rate": 1.900338233003179e-07, "loss": 0.5748, "step": 4165, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47333333333333333, "success_rate.epoch.env.logic": 0.43607954545454547, "success_rate.epoch.env.math": 0.8934977578475336, "success_rate.epoch.env.sat": 0.10486891385767791, "success_rate.epoch.env.science": 0.7464103188123631, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48549377091306434, "success_rate.epoch.global": 0.6878348566025843, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.21428571428571427, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9920720443349754, "tokens_p.mean_in_band": 0.5764780405405405, "tokens_rate.above_band": 0.8144433299899699, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1855566700100301 }, { "epoch": 0.6931515957446809, "grad_norm": 84.06062585441798, "learning_rate": 1.9001076610644368e-07, "loss": 0.5008, "step": 4170, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.43607954545454547, "success_rate.epoch.env.math": 0.8934977578475336, "success_rate.epoch.env.sat": 0.10486891385767791, "success_rate.epoch.env.science": 0.7466569414053003, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48583326921420833, "success_rate.epoch.global": 0.6880806172256337, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976099426386233, "tokens_p.mean_in_band": 0.7514204545454546, "tokens_rate.above_band": 0.9794007490636704, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020599250936329586 }, { "epoch": 0.6939827127659575, "grad_norm": 76.2497679839275, "learning_rate": 1.8998769114591218e-07, "loss": 0.472, "step": 4175, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.43607954545454547, "success_rate.epoch.env.math": 0.8936170212765957, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.7466601894583434, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48580883372009254, "success_rate.epoch.global": 0.6880604058518169, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9889914772727273, "tokens_p.mean_in_band": 0.6758643617021277, "tokens_rate.above_band": 0.7892376681614349, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.21076233183856502 }, { "epoch": 0.694813829787234, "grad_norm": 54.30070630849187, "learning_rate": 1.8996459845967575e-07, "loss": 0.501, "step": 4180, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.4376770538243626, "success_rate.epoch.env.math": 0.8936170212765957, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.7467248908296943, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48595994369656237, "success_rate.epoch.global": 0.6882463859208046, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7619047619047619, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9958202030812325, "tokens_p.mean_in_band": 0.5803571428571429, "tokens_rate.above_band": 0.9622641509433962, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03773584905660377 }, { "epoch": 0.6956449468085106, "grad_norm": 84.67462063163026, "learning_rate": 1.8994148808871827e-07, "loss": 0.6198, "step": 4185, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.43847241867043846, "success_rate.epoch.env.math": 0.8936170212765957, "success_rate.epoch.env.sat": 0.10408921933085502, "success_rate.epoch.env.science": 0.747031742185607, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48602483675043007, "success_rate.epoch.global": 0.6884319573065453, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9934090909090909, "tokens_p.mean_in_band": 0.6356026785714286, "tokens_rate.above_band": 0.8870967741935484, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11290322580645161 }, { "epoch": 0.6964760638297872, "grad_norm": 92.51553447457067, "learning_rate": 1.89918360074055e-07, "loss": 0.4158, "step": 4190, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.43723554301833567, "success_rate.epoch.env.math": 0.8936170212765957, "success_rate.epoch.env.sat": 0.10408921933085502, "success_rate.epoch.env.science": 0.7471542746427706, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4859235328236174, "success_rate.epoch.global": 0.6883137254901961, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.48888888888888893, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.998882393397524, "tokens_p.mean_below_band": 5.617039278149605e-09, "tokens_p.mean_in_band": 0.6546415441176471, "tokens_rate.above_band": 0.9540682414698163, "tokens_rate.below_band": 0.0013123359580052493, "tokens_rate.in_band": 0.04461942257217848 }, { "epoch": 0.6973071808510638, "grad_norm": 94.02753634138327, "learning_rate": 1.8989521445673248e-07, "loss": 0.4481, "step": 4195, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.43723554301833567, "success_rate.epoch.env.math": 0.8936170212765957, "success_rate.epoch.env.sat": 0.10408921933085502, "success_rate.epoch.env.science": 0.7473989837890153, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48594577910963965, "success_rate.epoch.global": 0.6885091707164133, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9917613636363637, "tokens_p.mean_in_band": 0.6403245192307693, "tokens_rate.above_band": 0.8286252354048964, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1713747645951036 }, { "epoch": 0.6981382978723404, "grad_norm": 111.91926376542496, "learning_rate": 1.898720512778285e-07, "loss": 0.4445, "step": 4200, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.43723554301833567, "success_rate.epoch.env.math": 0.8936170212765957, "success_rate.epoch.env.sat": 0.10408921933085502, "success_rate.epoch.env.science": 0.7477042049299178, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4859735264860853, "success_rate.epoch.global": 0.6887531328320802, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9969533426183844, "tokens_p.mean_in_band": 0.4327392578125, "tokens_rate.above_band": 0.9573333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.042666666666666665 }, { "epoch": 0.698969414893617, "grad_norm": 132.50047862122815, "learning_rate": 1.8984887057845206e-07, "loss": 0.4653, "step": 4205, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.436241610738255, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.43723554301833567, "success_rate.epoch.env.math": 0.8937360178970917, "success_rate.epoch.env.sat": 0.10408921933085502, "success_rate.epoch.env.science": 0.7478260869565218, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48599542454491257, "success_rate.epoch.global": 0.6888993267574761, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.984375, "tokens_p.mean_in_band": 0.7405598958333334, "tokens_rate.above_band": 0.9047619047619048, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09523809523809523 }, { "epoch": 0.6998005319148937, "grad_norm": 131.29949345986978, "learning_rate": 1.8982567239974322e-07, "loss": 0.4596, "step": 4210, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43333333333333335, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.4388185654008439, "success_rate.epoch.env.math": 0.8938547486033519, "success_rate.epoch.env.sat": 0.10408921933085502, "success_rate.epoch.env.science": 0.7480694980694981, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48590786879916925, "success_rate.epoch.global": 0.6891321344800625, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9941546762589928, "tokens_p.mean_in_band": 0.5889756944444444, "tokens_rate.above_band": 0.9686411149825784, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0313588850174216 }, { "epoch": 0.7006316489361702, "grad_norm": 112.48530892295777, "learning_rate": 1.8980245678287304e-07, "loss": 0.6103, "step": 4215, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43333333333333335, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.4388185654008439, "success_rate.epoch.env.math": 0.8940914158305463, "success_rate.epoch.env.sat": 0.1037037037037037, "success_rate.epoch.env.science": 0.7483124397299904, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48591642273194524, "success_rate.epoch.global": 0.6893158388003748, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9905849358974359, "tokens_p.mean_in_band": 0.62984375, "tokens_rate.above_band": 0.861878453038674, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13812154696132597 }, { "epoch": 0.7014627659574468, "grad_norm": 136.60351204631363, "learning_rate": 1.8977922376904354e-07, "loss": 0.5868, "step": 4220, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43333333333333335, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.43820224719101125, "success_rate.epoch.env.math": 0.8944444444444445, "success_rate.epoch.env.sat": 0.1037037037037037, "success_rate.epoch.env.science": 0.7484337349397591, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4859035141513848, "success_rate.epoch.global": 0.6894506866416978, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9943605834464043, "tokens_p.mean_in_band": 0.5211693548387096, "tokens_rate.above_band": 0.9596354166666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.040364583333333336 }, { "epoch": 0.7022938829787234, "grad_norm": 113.51386691917365, "learning_rate": 1.897559733994877e-07, "loss": 0.7681, "step": 4225, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43333333333333335, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.43820224719101125, "success_rate.epoch.env.math": 0.8945615982241953, "success_rate.epoch.env.sat": 0.1033210332103321, "success_rate.epoch.env.science": 0.7483156881616939, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48586864474304964, "success_rate.epoch.global": 0.6893703241895262, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.990530303030303, "tokens_p.mean_in_band": 0.6213942307692307, "tokens_rate.above_band": 0.8638743455497382, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13612565445026178 }, { "epoch": 0.703125, "grad_norm": 74.17269979025768, "learning_rate": 1.8973270571546913e-07, "loss": 0.551, "step": 4230, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43333333333333335, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.43899018232819076, "success_rate.epoch.env.math": 0.8947951273532669, "success_rate.epoch.env.sat": 0.1033210332103321, "success_rate.epoch.env.science": 0.748618120644076, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4859889989929254, "success_rate.epoch.global": 0.6897571606475716, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9921183628318584, "tokens_p.mean_below_band": 8.149072527885437e-09, "tokens_p.mean_in_band": 0.7651909722222222, "tokens_rate.above_band": 0.9576271186440678, "tokens_rate.below_band": 0.00423728813559322, "tokens_rate.in_band": 0.038135593220338986 }, { "epoch": 0.7039561170212766, "grad_norm": 257.42223166839324, "learning_rate": 1.8970942075828233e-07, "loss": 0.432, "step": 4235, "success_rate.epoch.env.abd": 0.42857142857142855, "success_rate.epoch.env.agentgym:alfworld": 0.43333333333333335, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.438375350140056, "success_rate.epoch.env.math": 0.8950276243093923, "success_rate.epoch.env.sat": 0.10294117647058823, "success_rate.epoch.env.science": 0.7487388902233966, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4859306878663405, "success_rate.epoch.global": 0.6897356143079316, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9962412587412588, "tokens_p.mean_in_band": 0.6454454787234043, "tokens_rate.above_band": 0.9383202099737533, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06167979002624672 }, { "epoch": 0.7047872340425532, "grad_norm": 115.5946326771729, "learning_rate": 1.8968611856925244e-07, "loss": 0.5261, "step": 4240, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.43333333333333335, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.43916083916083914, "success_rate.epoch.env.math": 0.8951434878587197, "success_rate.epoch.env.sat": 0.10294117647058823, "success_rate.epoch.env.science": 0.7486194477791116, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4872098648364021, "success_rate.epoch.global": 0.6898212898212899, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9960069444444445, "tokens_p.mean_in_band": 0.6231770833333333, "tokens_rate.above_band": 0.96, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04 }, { "epoch": 0.7056183510638298, "grad_norm": 106.75897843806494, "learning_rate": 1.89662799189735e-07, "loss": 0.4994, "step": 4245, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.43333333333333335, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.43916083916083914, "success_rate.epoch.env.math": 0.8952590959206174, "success_rate.epoch.env.sat": 0.10294117647058823, "success_rate.epoch.env.science": 0.7489208633093525, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48724777607205105, "success_rate.epoch.global": 0.6901102313305387, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9953435430463576, "tokens_p.mean_in_band": 0.7045200892857143, "tokens_rate.above_band": 0.9556962025316456, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04430379746835443 }, { "epoch": 0.7064494680851063, "grad_norm": 89.77211511640985, "learning_rate": 1.896394626611163e-07, "loss": 0.5757, "step": 4250, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4370860927152318, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.43854748603351956, "success_rate.epoch.env.math": 0.8953744493392071, "success_rate.epoch.env.sat": 0.10294117647058823, "success_rate.epoch.env.science": 0.7491013659237958, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48756007264365214, "success_rate.epoch.global": 0.6902435241197457, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9970170454545455, "tokens_p.mean_in_band": 0.5926339285714286, "tokens_rate.above_band": 0.9617486338797814, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03825136612021858 }, { "epoch": 0.707280585106383, "grad_norm": 104.68640032224654, "learning_rate": 1.89616109024813e-07, "loss": 0.5432, "step": 4255, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.43854748603351956, "success_rate.epoch.env.math": 0.8953744493392071, "success_rate.epoch.env.sat": 0.10294117647058823, "success_rate.epoch.env.science": 0.749341632750778, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4879185860796485, "success_rate.epoch.global": 0.6904835709857409, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949090121317158, "tokens_p.mean_below_band": 4.1443854570388794e-08, "tokens_p.mean_in_band": 0.8020833333333334, "tokens_rate.above_band": 0.9931153184165232, "tokens_rate.below_band": 0.0017211703958691911, "tokens_rate.in_band": 0.0051635111876075735 }, { "epoch": 0.7081117021276596, "grad_norm": 97.49138747304247, "learning_rate": 1.8959273832227197e-07, "loss": 0.4554, "step": 4260, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4768211920529801, "success_rate.epoch.env.logic": 0.4393305439330544, "success_rate.epoch.env.math": 0.8954895489548955, "success_rate.epoch.env.sat": 0.10294117647058823, "success_rate.epoch.env.science": 0.7495215311004785, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48801659115827795, "success_rate.epoch.global": 0.6907232460895153, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9922839506172839, "tokens_p.mean_in_band": 0.7349330357142857, "tokens_rate.above_band": 0.9455252918287937, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.054474708171206226 }, { "epoch": 0.7089428191489362, "grad_norm": 67.56734200042959, "learning_rate": 1.8956935059497052e-07, "loss": 0.3633, "step": 4265, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48026315789473684, "success_rate.epoch.env.logic": 0.43949930458970793, "success_rate.epoch.env.math": 0.8956043956043956, "success_rate.epoch.env.sat": 0.10294117647058823, "success_rate.epoch.env.science": 0.7496413199426112, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4883661695210092, "success_rate.epoch.global": 0.6908556397957605, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992973372781065, "tokens_p.mean_in_band": 0.6189236111111112, "tokens_rate.above_band": 0.9690366972477065, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03096330275229358 }, { "epoch": 0.7097739361702128, "grad_norm": 64.03939956288343, "learning_rate": 1.8954594588441617e-07, "loss": 0.591, "step": 4270, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48026315789473684, "success_rate.epoch.env.logic": 0.43949930458970793, "success_rate.epoch.env.math": 0.8956043956043956, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.75, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48836449735290943, "success_rate.epoch.global": 0.6910355486862442, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9965277777777778, "tokens_p.mean_in_band": 0.6300951086956522, "tokens_rate.above_band": 0.9361997226074896, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0638002773925104 }, { "epoch": 0.7106050531914894, "grad_norm": 148.01650939739176, "learning_rate": 1.8952252423214652e-07, "loss": 0.3919, "step": 4275, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48026315789473684, "success_rate.epoch.env.logic": 0.43949930458970793, "success_rate.epoch.env.math": 0.8957189901207464, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.7501193317422434, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.488385763376418, "success_rate.epoch.global": 0.6911787424687162, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9889705882352942, "tokens_p.mean_in_band": 0.6166294642857143, "tokens_rate.above_band": 0.9132231404958677, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08677685950413223 }, { "epoch": 0.711436170212766, "grad_norm": 51.177074398787305, "learning_rate": 1.894990856797293e-07, "loss": 0.6083, "step": 4280, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.2857142857142857, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48026315789473684, "success_rate.epoch.env.logic": 0.43949930458970793, "success_rate.epoch.env.math": 0.895947426067908, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.750059623181493, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4884011022297281, "success_rate.epoch.global": 0.6912627354121642, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.8571428571428572, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.98828125, "tokens_p.mean_in_band": 0.5224609375, "tokens_rate.above_band": 0.8947368421052632, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10526315789473684 }, { "epoch": 0.7122672872340425, "grad_norm": 184.75490898945247, "learning_rate": 1.8947563026876213e-07, "loss": 0.5772, "step": 4285, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48026315789473684, "success_rate.epoch.env.logic": 0.4388888888888889, "success_rate.epoch.env.math": 0.895947426067908, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.7502978317846081, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4866356634892442, "success_rate.epoch.global": 0.6912399753238742, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9946268459915611, "tokens_p.mean_in_band": 0.7361111111111112, "tokens_rate.above_band": 0.9546827794561934, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045317220543806644 }, { "epoch": 0.7130984042553191, "grad_norm": 86.46547925366191, "learning_rate": 1.8945215804087266e-07, "loss": 0.6543, "step": 4290, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.477124183006536, "success_rate.epoch.env.logic": 0.43966712898751736, "success_rate.epoch.env.math": 0.895947426067908, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.7503569728700619, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48642642769796063, "success_rate.epoch.global": 0.6912648282236944, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9849943374858438, "tokens_p.mean_in_band": 0.5704105062724014, "tokens_rate.above_band": 0.7598967297762479, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.24010327022375216 }, { "epoch": 0.7139295212765957, "grad_norm": 94.08215311191753, "learning_rate": 1.8942866903771834e-07, "loss": 0.5902, "step": 4295, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.477124183006536, "success_rate.epoch.env.logic": 0.43966712898751736, "success_rate.epoch.env.math": 0.895947426067908, "success_rate.epoch.env.sat": 0.10181818181818182, "success_rate.epoch.env.science": 0.7504757373929591, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48636941349586765, "success_rate.epoch.global": 0.6911470361816782, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.35714285714285715, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9934413580246914, "tokens_p.mean_below_band": 5.502442945726216e-11, "tokens_p.mean_in_band": 0.6466619318181818, "tokens_rate.above_band": 0.8127090301003345, "tokens_rate.below_band": 0.0033444816053511705, "tokens_rate.in_band": 0.18394648829431437 }, { "epoch": 0.7147606382978723, "grad_norm": 159.40994791261662, "learning_rate": 1.894051633009864e-07, "loss": 0.4786, "step": 4300, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.477124183006536, "success_rate.epoch.env.logic": 0.43966712898751736, "success_rate.epoch.env.math": 0.8960612691466083, "success_rate.epoch.env.sat": 0.10181818181818182, "success_rate.epoch.env.science": 0.7505938242280285, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48639049803348305, "success_rate.epoch.global": 0.6913257459243309, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.990234375, "tokens_p.mean_below_band": 2.514570951461792e-08, "tokens_p.mean_in_band": 0.6315104166666666, "tokens_rate.above_band": 0.927536231884058, "tokens_rate.below_band": 0.007246376811594203, "tokens_rate.in_band": 0.06521739130434782 }, { "epoch": 0.715591755319149, "grad_norm": 84.0879312020626, "learning_rate": 1.893816408723938e-07, "loss": 0.553, "step": 4305, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.477124183006536, "success_rate.epoch.env.logic": 0.43966712898751736, "success_rate.epoch.env.math": 0.8960612691466083, "success_rate.epoch.env.sat": 0.10144927536231885, "success_rate.epoch.env.science": 0.7507714217896986, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4863731063158292, "success_rate.epoch.global": 0.691361819858592, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.3666666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9970407196969697, "tokens_p.mean_in_band": 0.6302734375, "tokens_rate.above_band": 0.9406175771971497, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05938242280285035 }, { "epoch": 0.7164228723404256, "grad_norm": 73.02164494461557, "learning_rate": 1.8935810179368701e-07, "loss": 0.4041, "step": 4310, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4805194805194805, "success_rate.epoch.env.logic": 0.43966712898751736, "success_rate.epoch.env.math": 0.8960612691466083, "success_rate.epoch.env.sat": 0.10108303249097472, "success_rate.epoch.env.science": 0.7511258592083432, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4866806965031241, "success_rate.epoch.global": 0.6915873503223826, "success_rate.window.env.ded": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9951517706576728, "tokens_p.mean_in_band": 0.7201334635416666, "tokens_rate.above_band": 0.9251170046801872, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0748829953198128 }, { "epoch": 0.7172539893617021, "grad_norm": 78.4239188360767, "learning_rate": 1.8933454610664218e-07, "loss": 0.4876, "step": 4315, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4805194805194805, "success_rate.epoch.env.logic": 0.4404432132963989, "success_rate.epoch.env.math": 0.8961748633879781, "success_rate.epoch.env.sat": 0.10108303249097472, "success_rate.epoch.env.science": 0.7514204545454546, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48678835776561163, "success_rate.epoch.global": 0.691918417420641, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9916460396039604, "tokens_p.mean_in_band": 0.630859375, "tokens_rate.above_band": 0.9181818181818182, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08181818181818182 }, { "epoch": 0.7180851063829787, "grad_norm": 98.40533914804605, "learning_rate": 1.8931097385306486e-07, "loss": 0.8607, "step": 4320, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4805194805194805, "success_rate.epoch.env.logic": 0.43983402489626555, "success_rate.epoch.env.math": 0.8961748633879781, "success_rate.epoch.env.sat": 0.10108303249097472, "success_rate.epoch.env.science": 0.7514191106906338, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4867328548333431, "success_rate.epoch.global": 0.6918479926448053, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.375, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9944371727748691, "tokens_p.mean_in_band": 0.6472981770833334, "tokens_rate.above_band": 0.9408866995073891, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.059113300492610835 }, { "epoch": 0.7189162234042553, "grad_norm": 103.58482178009173, "learning_rate": 1.8928738507479003e-07, "loss": 0.4619, "step": 4325, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4805194805194805, "success_rate.epoch.env.logic": 0.43983402489626555, "success_rate.epoch.env.math": 0.8962882096069869, "success_rate.epoch.env.sat": 0.10108303249097472, "success_rate.epoch.env.science": 0.7515366430260048, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48675384379283226, "success_rate.epoch.global": 0.6919895849287793, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976796998420221, "tokens_p.mean_in_band": 0.5546875, "tokens_rate.above_band": 0.960546282245827, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03945371775417299 }, { "epoch": 0.7197473404255319, "grad_norm": 76.54371369058103, "learning_rate": 1.8926377981368202e-07, "loss": 0.5284, "step": 4330, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4805194805194805, "success_rate.epoch.env.logic": 0.4413793103448276, "success_rate.epoch.env.math": 0.8964013086150491, "success_rate.epoch.env.sat": 0.10071942446043165, "success_rate.epoch.env.science": 0.7514177693761814, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4868607440452193, "success_rate.epoch.global": 0.6919663351185922, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.4333333333333333, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.992442936458976, "tokens_p.mean_below_band": 4.4648186303675175e-07, "tokens_p.mean_in_band": 0.5511291348600509, "tokens_rate.above_band": 0.8040674603174603, "tokens_rate.below_band": 0.000992063492063492, "tokens_rate.in_band": 0.1949404761904762 }, { "epoch": 0.7205784574468085, "grad_norm": 84.83162136887483, "learning_rate": 1.8924015811163437e-07, "loss": 0.5044, "step": 4335, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4838709677419355, "success_rate.epoch.env.logic": 0.44214876033057854, "success_rate.epoch.env.math": 0.8964013086150491, "success_rate.epoch.env.sat": 0.1003584229390681, "success_rate.epoch.env.science": 0.751357733175915, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4871970985439989, "success_rate.epoch.global": 0.6919431279620853, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.7777777777777778, "tokens_p.mean_above_band": 0.9891377005347594, "tokens_p.mean_in_band": 0.7549913194444444, "tokens_rate.above_band": 0.8862559241706162, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11374407582938388 }, { "epoch": 0.7214095744680851, "grad_norm": 73.98248086914344, "learning_rate": 1.8921652001056978e-07, "loss": 0.4356, "step": 4340, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4407894736842105, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4838709677419355, "success_rate.epoch.env.logic": 0.44214876033057854, "success_rate.epoch.env.math": 0.8965141612200436, "success_rate.epoch.env.sat": 0.1003584229390681, "success_rate.epoch.env.science": 0.7513564519933946, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48720724140058747, "success_rate.epoch.global": 0.692025664527956, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9955561926605505, "tokens_p.mean_in_band": 0.42373511904761907, "tokens_rate.above_band": 0.9688888888888889, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03111111111111111 }, { "epoch": 0.7222406914893617, "grad_norm": 67.93868482693091, "learning_rate": 1.8919286555244013e-07, "loss": 0.4441, "step": 4345, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4838709677419355, "success_rate.epoch.env.logic": 0.44214876033057854, "success_rate.epoch.env.math": 0.8965141612200436, "success_rate.epoch.env.sat": 0.1003584229390681, "success_rate.epoch.env.science": 0.7514150943396226, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.487544842592084, "success_rate.epoch.global": 0.6921197312156384, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9975308641975309, "tokens_p.mean_in_band": 0.8370535714285714, "tokens_rate.above_band": 0.9830097087378641, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01699029126213592 }, { "epoch": 0.7230718085106383, "grad_norm": 159.20782865461274, "learning_rate": 1.8916919477922627e-07, "loss": 0.5283, "step": 4350, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48717948717948717, "success_rate.epoch.env.logic": 0.44214876033057854, "success_rate.epoch.env.math": 0.8967391304347826, "success_rate.epoch.env.sat": 0.1003584229390681, "success_rate.epoch.env.science": 0.7515322960867515, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48787672353748585, "success_rate.epoch.global": 0.6923546467266901, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7083333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9940713431646074, "tokens_p.mean_below_band": 3.259629011154175e-07, "tokens_p.mean_in_band": 0.50625, "tokens_rate.above_band": 0.8928825622775801, "tokens_rate.below_band": 0.00035587188612099647, "tokens_rate.in_band": 0.10676156583629894 }, { "epoch": 0.7239029255319149, "grad_norm": 267.6183368942195, "learning_rate": 1.8914550773293804e-07, "loss": 0.4552, "step": 4355, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.44805194805194803, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48717948717948717, "success_rate.epoch.env.logic": 0.44214876033057854, "success_rate.epoch.env.math": 0.8970747562296858, "success_rate.epoch.env.sat": 0.099644128113879, "success_rate.epoch.env.science": 0.7513551732264907, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48815415187539096, "success_rate.epoch.global": 0.6922256097560976, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9950810185185185, "tokens_p.mean_in_band": 0.5784801136363636, "tokens_rate.above_band": 0.907563025210084, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09243697478991597 }, { "epoch": 0.7247340425531915, "grad_norm": 76.93213741137572, "learning_rate": 1.8912180445561412e-07, "loss": 0.5652, "step": 4360, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.44805194805194803, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48717948717948717, "success_rate.epoch.env.logic": 0.44214876033057854, "success_rate.epoch.env.math": 0.8971861471861472, "success_rate.epoch.env.sat": 0.099644128113879, "success_rate.epoch.env.science": 0.7512941176470588, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48815872781875724, "success_rate.epoch.global": 0.6922959805115713, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9865384615384616, "tokens_p.mean_in_band": 0.6928453947368421, "tokens_rate.above_band": 0.87248322147651, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12751677852348994 }, { "epoch": 0.7255651595744681, "grad_norm": 81.75392936680419, "learning_rate": 1.8909808498932205e-07, "loss": 0.6424, "step": 4365, "success_rate.epoch.env.abd": 0.4418604651162791, "success_rate.epoch.env.agentgym:alfworld": 0.44805194805194803, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4840764331210191, "success_rate.epoch.env.logic": 0.44154057771664373, "success_rate.epoch.env.math": 0.8971861471861472, "success_rate.epoch.env.sat": 0.099644128113879, "success_rate.epoch.env.science": 0.7513513513513513, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48782654573074724, "success_rate.epoch.global": 0.6921673003802281, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.625, "success_rate.window.env_macro_mean": 0.20833333333333334, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9932397959183673, "tokens_p.mean_in_band": 0.6305118865030674, "tokens_rate.above_band": 0.8825648414985591, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11743515850144093 }, { "epoch": 0.7263962765957447, "grad_norm": 667.9185564578713, "learning_rate": 1.8907434937615803e-07, "loss": 0.4817, "step": 4370, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.44805194805194803, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48427672955974843, "success_rate.epoch.env.logic": 0.44154057771664373, "success_rate.epoch.env.math": 0.8974082073434125, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.7514681700728212, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48899661998298405, "success_rate.epoch.global": 0.6922375816497038, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9915844298245614, "tokens_p.mean_in_band": 0.5990010245901639, "tokens_rate.above_band": 0.8236994219653179, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17630057803468208 }, { "epoch": 0.7272273936170213, "grad_norm": 106.54796761053292, "learning_rate": 1.89050597658247e-07, "loss": 0.3869, "step": 4375, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.44805194805194803, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48427672955974843, "success_rate.epoch.env.logic": 0.44154057771664373, "success_rate.epoch.env.math": 0.8974082073434125, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.7513500821789152, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4889858847199017, "success_rate.epoch.global": 0.6921791951404708, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9954941860465116, "tokens_p.mean_in_band": 0.6475929054054054, "tokens_rate.above_band": 0.9457478005865103, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.054252199413489736 }, { "epoch": 0.7280585106382979, "grad_norm": 100.99380518293161, "learning_rate": 1.8902682987774242e-07, "loss": 0.3401, "step": 4380, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.44805194805194803, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48427672955974843, "success_rate.epoch.env.logic": 0.44154057771664373, "success_rate.epoch.env.math": 0.8975188781014024, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.7513488153882243, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48899583053511064, "success_rate.epoch.global": 0.6922610015174507, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9900442477876106, "tokens_p.mean_in_band": 0.730078125, "tokens_rate.above_band": 0.9576271186440678, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0423728813559322 }, { "epoch": 0.7288896276595744, "grad_norm": 79.44632920665005, "learning_rate": 1.8900304607682626e-07, "loss": 0.5829, "step": 4385, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.44805194805194803, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48427672955974843, "success_rate.epoch.env.logic": 0.4409340659340659, "success_rate.epoch.env.math": 0.8976293103448276, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.7514071294559099, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48895603367406826, "success_rate.epoch.global": 0.6922493553769149, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4166666666666667, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9933271670190275, "tokens_p.mean_in_band": 0.6509507123161765, "tokens_rate.above_band": 0.912540192926045, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08745980707395498 }, { "epoch": 0.729720744680851, "grad_norm": 62.61111910390177, "learning_rate": 1.8897924629770898e-07, "loss": 0.3829, "step": 4390, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.44805194805194803, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48427672955974843, "success_rate.epoch.env.logic": 0.44170096021947874, "success_rate.epoch.env.math": 0.8976293103448276, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.7515236755743084, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4890363464380511, "success_rate.epoch.global": 0.6923893268647665, "success_rate.window.env.logic": 0.8333333333333334, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995286673553719, "tokens_p.mean_in_band": 0.6028262867647058, "tokens_rate.above_band": 0.9660678642714571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033932135728542916 }, { "epoch": 0.7305518617021277, "grad_norm": 86.15968875375953, "learning_rate": 1.8895543058262932e-07, "loss": 0.5256, "step": 4395, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.44805194805194803, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48427672955974843, "success_rate.epoch.env.logic": 0.44170096021947874, "success_rate.epoch.env.math": 0.8966630785791173, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.751580426129712, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48895366632802323, "success_rate.epoch.global": 0.6923659497122084, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9928678678678678, "tokens_p.mean_in_band": 0.6512784090909091, "tokens_rate.above_band": 0.9380281690140845, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.061971830985915494 }, { "epoch": 0.7313829787234043, "grad_norm": 186.37100709797232, "learning_rate": 1.8893159897385438e-07, "loss": 0.6695, "step": 4400, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48427672955974843, "success_rate.epoch.env.logic": 0.4410958904109589, "success_rate.epoch.env.math": 0.896774193548387, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.7514626725953663, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4892217796735016, "success_rate.epoch.global": 0.6922960496443167, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9965999388753056, "tokens_p.mean_below_band": 1.8533319234848022e-07, "tokens_p.mean_in_band": 0.509521484375, "tokens_rate.above_band": 0.9522700814901047, "tokens_rate.below_band": 0.0011641443538998836, "tokens_rate.in_band": 0.046565774155995346 }, { "epoch": 0.7322140957446809, "grad_norm": 101.93626897576901, "learning_rate": 1.8890775151367938e-07, "loss": 0.6065, "step": 4405, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48125, "success_rate.epoch.env.logic": 0.4410958904109589, "success_rate.epoch.env.math": 0.896774193548387, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.751578947368421, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48895719287471134, "success_rate.epoch.global": 0.6922844175491679, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.21428571428571427, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9967196132596685, "tokens_p.mean_in_band": 0.6408808679039302, "tokens_rate.above_band": 0.92672, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07328 }, { "epoch": 0.7330452127659575, "grad_norm": 117.9752160742047, "learning_rate": 1.8888388824442777e-07, "loss": 0.4082, "step": 4410, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48125, "success_rate.epoch.env.logic": 0.4410958904109589, "success_rate.epoch.env.math": 0.896774193548387, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.751927119831815, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48898884491683803, "success_rate.epoch.global": 0.6925634824667473, "success_rate.window.env.science": 0.9, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9879807692307693, "tokens_p.mean_in_band": 0.7007211538461539, "tokens_rate.above_band": 0.9, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1 }, { "epoch": 0.733876329787234, "grad_norm": 130.0204194839608, "learning_rate": 1.88860009208451e-07, "loss": 0.343, "step": 4415, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48125, "success_rate.epoch.env.logic": 0.4410958904109589, "success_rate.epoch.env.math": 0.8968850698174007, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.7522165188987401, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4890252335837415, "success_rate.epoch.global": 0.692842041679251, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974424552429667, "tokens_p.mean_in_band": 0.5304418103448276, "tokens_rate.above_band": 0.9642416769420469, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.035758323057953144 }, { "epoch": 0.7347074468085106, "grad_norm": 240.71635052843217, "learning_rate": 1.8883611444812845e-07, "loss": 0.5749, "step": 4420, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48125, "success_rate.epoch.env.logic": 0.4418604651162791, "success_rate.epoch.env.math": 0.8968850698174007, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.7524475524475525, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48911574342502634, "success_rate.epoch.global": 0.6930737890448166, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9913194444444444, "tokens_p.mean_in_band": 0.7491319444444444, "tokens_rate.above_band": 0.9523809523809523, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.047619047619047616 }, { "epoch": 0.7355385638297872, "grad_norm": 94.6305726076469, "learning_rate": 1.888122040058675e-07, "loss": 0.4512, "step": 4425, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.484472049689441, "success_rate.epoch.env.logic": 0.4426229508196721, "success_rate.epoch.env.math": 0.8971061093247589, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.7526205450733753, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4895137950183004, "success_rate.epoch.global": 0.6933976484775399, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9855401529636711, "tokens_p.mean_in_band": 0.854375, "tokens_rate.above_band": 0.9543795620437956, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04562043795620438 }, { "epoch": 0.7363696808510638, "grad_norm": 109.1126549076257, "learning_rate": 1.8878827792410335e-07, "loss": 0.4643, "step": 4430, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.484472049689441, "success_rate.epoch.env.logic": 0.444141689373297, "success_rate.epoch.env.math": 0.8971061093247589, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.7529658060013956, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48968324951663184, "success_rate.epoch.global": 0.6937669376693767, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9936740890688259, "tokens_p.mean_in_band": 0.825390625, "tokens_rate.above_band": 0.9610894941634242, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038910505836575876 }, { "epoch": 0.7372007978723404, "grad_norm": 137.67767526394704, "learning_rate": 1.887643362452989e-07, "loss": 0.7313, "step": 4435, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.484472049689441, "success_rate.epoch.env.logic": 0.444141689373297, "success_rate.epoch.env.math": 0.8971061093247589, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.7532527881040892, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4897093387986949, "success_rate.epoch.global": 0.6939972920114337, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9976587947882736, "tokens_p.mean_in_band": 0.5655184659090909, "tokens_rate.above_band": 0.9702031602708804, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029796839729119638 }, { "epoch": 0.738031914893617, "grad_norm": 75.32472949630618, "learning_rate": 1.8874037901194479e-07, "loss": 0.4643, "step": 4440, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.484472049689441, "success_rate.epoch.env.logic": 0.444141689373297, "success_rate.epoch.env.math": 0.8972162740899358, "success_rate.epoch.env.sat": 0.0989399293286219, "success_rate.epoch.env.science": 0.7534818941504178, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4897082860712661, "success_rate.epoch.global": 0.694122952051706, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9994261268781303, "tokens_p.mean_in_band": 0.5360169491525424, "tokens_rate.above_band": 0.9530628480509149, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.046937151949085126 }, { "epoch": 0.7388630319148937, "grad_norm": 132.7806959351578, "learning_rate": 1.8871640626655923e-07, "loss": 0.3691, "step": 4445, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.484472049689441, "success_rate.epoch.env.logic": 0.4448979591836735, "success_rate.epoch.env.math": 0.8975453575240128, "success_rate.epoch.env.sat": 0.0989399293286219, "success_rate.epoch.env.science": 0.7534214799350498, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4898014623466374, "success_rate.epoch.global": 0.6942942942942943, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9925411522633745, "tokens_p.mean_in_band": 0.7430555555555556, "tokens_rate.above_band": 0.9310344827586207, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06896551724137931 }, { "epoch": 0.7396941489361702, "grad_norm": 64.22165100729973, "learning_rate": 1.8869241805168795e-07, "loss": 0.5711, "step": 4450, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.484472049689441, "success_rate.epoch.env.logic": 0.44429347826086957, "success_rate.epoch.env.math": 0.8976545842217484, "success_rate.epoch.env.sat": 0.09859154929577464, "success_rate.epoch.env.science": 0.753592953175707, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.489740356799614, "success_rate.epoch.global": 0.6942694269426942, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9988425925925926, "tokens_p.mean_in_band": 0.625, "tokens_rate.above_band": 0.9253365973072215, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07466340269277846 }, { "epoch": 0.7405252659574468, "grad_norm": 114.42536682688672, "learning_rate": 1.886684144099042e-07, "loss": 0.6166, "step": 4455, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.484472049689441, "success_rate.epoch.env.logic": 0.44369063772048845, "success_rate.epoch.env.math": 0.8976545842217484, "success_rate.epoch.env.sat": 0.09859154929577464, "success_rate.epoch.env.science": 0.7537071362372567, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4896959333924475, "success_rate.epoch.global": 0.6942570100464838, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.3333333333333333, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.999859865470852, "tokens_p.mean_in_band": 0.5884989754098361, "tokens_rate.above_band": 0.9164383561643835, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08356164383561644 }, { "epoch": 0.7413563829787234, "grad_norm": 130.77393913351526, "learning_rate": 1.8864439538380853e-07, "loss": 0.4507, "step": 4460, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48466257668711654, "success_rate.epoch.env.logic": 0.4444444444444444, "success_rate.epoch.env.math": 0.8976545842217484, "success_rate.epoch.env.sat": 0.09859154929577464, "success_rate.epoch.env.science": 0.7539351851851852, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4898025136351347, "success_rate.epoch.global": 0.6944278010784901, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.993574236468098, "tokens_p.mean_in_band": 0.7829953457446809, "tokens_rate.above_band": 0.9859868813357185, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014013118664281454 }, { "epoch": 0.7421875, "grad_norm": 66.2519139594964, "learning_rate": 1.8862036101602884e-07, "loss": 0.4097, "step": 4465, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48466257668711654, "success_rate.epoch.env.logic": 0.4438430311231394, "success_rate.epoch.env.math": 0.8976545842217484, "success_rate.epoch.env.sat": 0.09859154929577464, "success_rate.epoch.env.science": 0.7541059449456397, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48976336331142106, "success_rate.epoch.global": 0.6944610778443113, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45999999999999996, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9944545454545455, "tokens_p.mean_in_band": 0.6390143060064936, "tokens_rate.above_band": 0.8992805755395683, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10071942446043165 }, { "epoch": 0.7430186170212766, "grad_norm": 70.04259190456737, "learning_rate": 1.8859631134922022e-07, "loss": 0.4983, "step": 4470, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48466257668711654, "success_rate.epoch.env.logic": 0.4438430311231394, "success_rate.epoch.env.math": 0.8976545842217484, "success_rate.epoch.env.sat": 0.09859154929577464, "success_rate.epoch.env.science": 0.7544467544467545, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.48979434599334054, "success_rate.epoch.global": 0.6947352677236015, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975476457399103, "tokens_p.mean_in_band": 0.6392299107142857, "tokens_rate.above_band": 0.9695652173913043, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030434782608695653 }, { "epoch": 0.7438497340425532, "grad_norm": 84.71976278032501, "learning_rate": 1.8857224642606498e-07, "loss": 0.4106, "step": 4475, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.4445945945945946, "success_rate.epoch.env.math": 0.8977635782747604, "success_rate.epoch.env.sat": 0.09859154929577464, "success_rate.epoch.env.science": 0.7545034642032332, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4901633976881233, "success_rate.epoch.global": 0.6949177877428998, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9871244635193133, "tokens_p.mean_in_band": 0.784912109375, "tokens_rate.above_band": 0.966804979253112, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03319502074688797 }, { "epoch": 0.7446808510638298, "grad_norm": 101.78363343055061, "learning_rate": 1.8854816628927244e-07, "loss": 0.6266, "step": 4480, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4551282051282051, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.44534412955465585, "success_rate.epoch.env.math": 0.8977635782747604, "success_rate.epoch.env.sat": 0.09859154929577464, "success_rate.epoch.env.science": 0.7546125461254612, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49056102666854956, "success_rate.epoch.global": 0.6951328754852195, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.6785714285714286, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9958448753462604, "tokens_p.mean_in_band": 0.6267027243589743, "tokens_rate.above_band": 0.9025, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0975 }, { "epoch": 0.7455119680851063, "grad_norm": 270.3873463813209, "learning_rate": 1.885240709815789e-07, "loss": 0.4389, "step": 4485, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4585987261146497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.44609164420485176, "success_rate.epoch.env.math": 0.8978723404255319, "success_rate.epoch.env.sat": 0.0975609756097561, "success_rate.epoch.env.science": 0.7547256800368833, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4908709683060782, "success_rate.epoch.global": 0.6950492096629883, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9958530805687204, "tokens_p.mean_in_band": 0.5828208111702128, "tokens_rate.above_band": 0.8997867803837953, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10021321961620469 }, { "epoch": 0.746343085106383, "grad_norm": 92.75956210863058, "learning_rate": 1.8849996054574774e-07, "loss": 0.5714, "step": 4490, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4585987261146497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.44609164420485176, "success_rate.epoch.env.math": 0.89798087141339, "success_rate.epoch.env.sat": 0.0975609756097561, "success_rate.epoch.env.science": 0.7549516351911562, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4911835773063001, "success_rate.epoch.global": 0.695321811680572, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9908333333333333, "tokens_p.mean_in_band": 0.6580584490740741, "tokens_rate.above_band": 0.9615384615384616, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038461538461538464 }, { "epoch": 0.7471742021276596, "grad_norm": 87.23461190489874, "learning_rate": 1.8847583502456897e-07, "loss": 0.443, "step": 4495, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4585987261146497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.44609164420485176, "success_rate.epoch.env.math": 0.8980891719745223, "success_rate.epoch.env.sat": 0.0975609756097561, "success_rate.epoch.env.science": 0.7552334943639292, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4912190463730188, "success_rate.epoch.global": 0.6955939267639178, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957496136012365, "tokens_p.mean_in_band": 0.71484375, "tokens_rate.above_band": 0.9528718703976435, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.047128129602356406 }, { "epoch": 0.7480053191489362, "grad_norm": 95.23486458877622, "learning_rate": 1.884516944608595e-07, "loss": 0.4724, "step": 4500, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4585987261146497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.44609164420485176, "success_rate.epoch.env.math": 0.8980891719745223, "success_rate.epoch.env.sat": 0.0975609756097561, "success_rate.epoch.env.science": 0.7555147058823529, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49124461105651185, "success_rate.epoch.global": 0.6958203183102781, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9838963963963964, "tokens_p.mean_below_band": 6.07222318649292e-07, "tokens_p.mean_in_band": 0.7063238188976378, "tokens_rate.above_band": 0.8125915080527086, "tokens_rate.below_band": 0.0014641288433382138, "tokens_rate.in_band": 0.18594436310395315 }, { "epoch": 0.7488364361702128, "grad_norm": 87.79097437163627, "learning_rate": 1.8842753889746295e-07, "loss": 0.3709, "step": 4505, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4585987261146497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.4468371467025572, "success_rate.epoch.env.math": 0.8980891719745223, "success_rate.epoch.env.sat": 0.0975609756097561, "success_rate.epoch.env.science": 0.7556270096463023, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.491322593443935, "success_rate.epoch.global": 0.6959559916741005, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9909336419753086, "tokens_p.mean_in_band": 0.7455610795454546, "tokens_rate.above_band": 0.8804347826086957, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11956521739130435 }, { "epoch": 0.7496675531914894, "grad_norm": 60.275132336223486, "learning_rate": 1.884033683772495e-07, "loss": 0.4219, "step": 4510, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4585987261146497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.4468371467025572, "success_rate.epoch.env.math": 0.8981972428419936, "success_rate.epoch.env.sat": 0.10380622837370242, "success_rate.epoch.env.science": 0.7558513079394218, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49192055907343835, "success_rate.epoch.global": 0.6962720926778554, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9873481781376519, "tokens_p.mean_below_band": 1.0477378964424133e-08, "tokens_p.mean_in_band": 0.759046052631579, "tokens_rate.above_band": 0.9250936329588015, "tokens_rate.below_band": 0.003745318352059925, "tokens_rate.in_band": 0.07116104868913857 }, { "epoch": 0.750498670212766, "grad_norm": 129.80182577304845, "learning_rate": 1.883791829431159e-07, "loss": 0.4917, "step": 4515, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4585987261146497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.4469798657718121, "success_rate.epoch.env.math": 0.8981972428419936, "success_rate.epoch.env.sat": 0.10380622837370242, "success_rate.epoch.env.science": 0.756131102452441, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4919589693990996, "success_rate.epoch.global": 0.6964391691394659, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9963735673352435, "tokens_p.mean_in_band": 0.5546875, "tokens_rate.above_band": 0.9432432432432433, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05675675675675676 }, { "epoch": 0.7513297872340425, "grad_norm": 115.17396105034123, "learning_rate": 1.8835498263798544e-07, "loss": 0.3525, "step": 4520, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4585987261146497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.44638069705093836, "success_rate.epoch.env.math": 0.8981972428419936, "success_rate.epoch.env.sat": 0.10380622837370242, "success_rate.epoch.env.science": 0.7561813186813187, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4919090646271, "success_rate.epoch.global": 0.6964126890008894, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9970112517580872, "tokens_p.mean_below_band": 1.2014061212539673e-07, "tokens_p.mean_in_band": 0.5718961148648649, "tokens_rate.above_band": 0.9492656875834445, "tokens_rate.below_band": 0.0013351134846461949, "tokens_rate.in_band": 0.049399198931909215 }, { "epoch": 0.7521609042553191, "grad_norm": 97.3441646839279, "learning_rate": 1.8833076750480762e-07, "loss": 0.5321, "step": 4525, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4585987261146497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.4457831325301205, "success_rate.epoch.env.math": 0.8983050847457628, "success_rate.epoch.env.sat": 0.10380622837370242, "success_rate.epoch.env.science": 0.7564043915827996, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4918848237438666, "success_rate.epoch.global": 0.6965343601895735, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.992814465408805, "tokens_p.mean_in_band": 0.5687087414821125, "tokens_rate.above_band": 0.8713283647523016, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12867163524769837 }, { "epoch": 0.7529920212765957, "grad_norm": 82.49339131704869, "learning_rate": 1.8830653758655847e-07, "loss": 0.3581, "step": 4530, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4585987261146497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.4457831325301205, "success_rate.epoch.env.math": 0.8984126984126984, "success_rate.epoch.env.sat": 0.10380622837370242, "success_rate.epoch.env.science": 0.7565157750342936, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4919047325728147, "success_rate.epoch.global": 0.6966691339748334, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9987344236760125, "tokens_p.mean_in_band": 0.7041377314814815, "tokens_rate.above_band": 0.9596412556053812, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04035874439461883 }, { "epoch": 0.7538231382978723, "grad_norm": 81.1211690797154, "learning_rate": 1.882822929262401e-07, "loss": 0.3259, "step": 4535, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4585987261146497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.4457831325301205, "success_rate.epoch.env.math": 0.8986272439281943, "success_rate.epoch.env.sat": 0.10380622837370242, "success_rate.epoch.env.science": 0.7566826593557231, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4919394080125352, "success_rate.epoch.global": 0.6968934911242604, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9855769230769231, "tokens_p.mean_in_band": 0.8697916666666666, "tokens_rate.above_band": 0.9774436090225563, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022556390977443608 }, { "epoch": 0.754654255319149, "grad_norm": 98.63975221861453, "learning_rate": 1.8825803356688083e-07, "loss": 0.3687, "step": 4540, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4620253164556962, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.4457831325301205, "success_rate.epoch.env.math": 0.8987341772151899, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7569048162519972, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49224829237732454, "success_rate.epoch.global": 0.6970592581646224, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944571865443425, "tokens_p.mean_in_band": 0.6529017857142857, "tokens_rate.above_band": 0.9211267605633803, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07887323943661972 }, { "epoch": 0.7554853723404256, "grad_norm": 93.80341475181635, "learning_rate": 1.8823375955153514e-07, "loss": 0.3712, "step": 4545, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4620253164556962, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.446524064171123, "success_rate.epoch.env.math": 0.8987341772151899, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7569048162519972, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4923156497992339, "success_rate.epoch.global": 0.6971040189125296, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9930240471869328, "tokens_p.mean_in_band": 0.7568873355263158, "tokens_rate.above_band": 0.9354838709677419, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06451612903225806 }, { "epoch": 0.7563164893617021, "grad_norm": 1080.2769110308175, "learning_rate": 1.8820947092328338e-07, "loss": 0.5171, "step": 4550, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4620253164556962, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.446524064171123, "success_rate.epoch.env.math": 0.8989473684210526, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.757015742642026, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49234511503522405, "success_rate.epoch.global": 0.6972829297105729, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962899543378996, "tokens_p.mean_in_band": 0.7503004807692307, "tokens_rate.above_band": 0.9805970149253731, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019402985074626865 }, { "epoch": 0.7571476063829787, "grad_norm": 77.03679756876943, "learning_rate": 1.88185167725232e-07, "loss": 0.7005, "step": 4555, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4620253164556962, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.448, "success_rate.epoch.env.math": 0.8991596638655462, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7570711678832117, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49250362926382, "success_rate.epoch.global": 0.6975062712114505, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9941722972972973, "tokens_p.mean_in_band": 0.7371651785714286, "tokens_rate.above_band": 0.9296482412060302, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07035175879396985 }, { "epoch": 0.7579787234042553, "grad_norm": 87.16888046312441, "learning_rate": 1.8816085000051317e-07, "loss": 0.3796, "step": 4560, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4620253164556962, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.448, "success_rate.epoch.env.math": 0.8991596638655462, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.7573479152426521, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4925287881146783, "success_rate.epoch.global": 0.697729283397228, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9963983050847458, "tokens_p.mean_in_band": 0.7682291666666666, "tokens_rate.above_band": 0.9800664451827242, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019933554817275746 }, { "epoch": 0.7588098404255319, "grad_norm": 95.60483840089177, "learning_rate": 1.881365177922849e-07, "loss": 0.4044, "step": 4565, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.46540880503144655, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4909090909090909, "success_rate.epoch.env.logic": 0.448, "success_rate.epoch.env.math": 0.8991596638655462, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.757568859549283, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4928564638312584, "success_rate.epoch.global": 0.6979519669957271, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9950342465753425, "tokens_p.mean_in_band": 0.7084517045454546, "tokens_rate.above_band": 0.9707446808510638, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02925531914893617 }, { "epoch": 0.7596409574468085, "grad_norm": 136.58477556778897, "learning_rate": 1.8811217114373093e-07, "loss": 0.4351, "step": 4570, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.46540880503144655, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4879518072289157, "success_rate.epoch.env.logic": 0.4474034620505992, "success_rate.epoch.env.math": 0.8991596638655462, "success_rate.epoch.env.sat": 0.10309278350515463, "success_rate.epoch.env.science": 0.7575068243858053, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4924954320903522, "success_rate.epoch.global": 0.6976299131458855, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.35, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9937592749629002, "tokens_p.mean_below_band": 4.274625098332763e-11, "tokens_p.mean_in_band": 0.5373263888888888, "tokens_rate.above_band": 0.9159223300970873, "tokens_rate.below_band": 0.0001941747572815534, "tokens_rate.in_band": 0.08388349514563107 }, { "epoch": 0.7604720744680851, "grad_norm": 69.68600503184477, "learning_rate": 1.8808781009806054e-07, "loss": 0.4638, "step": 4575, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4879518072289157, "success_rate.epoch.env.logic": 0.44680851063829785, "success_rate.epoch.env.math": 0.89937106918239, "success_rate.epoch.env.sat": 0.10238907849829351, "success_rate.epoch.env.science": 0.7575619740732318, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49213716786795775, "success_rate.epoch.global": 0.6973529411764706, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9909735654416505, "tokens_p.mean_in_band": 0.5750465645695364, "tokens_rate.above_band": 0.8370210469508904, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16297895304910956 }, { "epoch": 0.7613031914893617, "grad_norm": 154.82241261414472, "learning_rate": 1.8806343469850865e-07, "loss": 0.4098, "step": 4580, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4879518072289157, "success_rate.epoch.env.logic": 0.44680851063829785, "success_rate.epoch.env.math": 0.8994764397905759, "success_rate.epoch.env.sat": 0.10238907849829351, "success_rate.epoch.env.science": 0.7578373466606089, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49217178088573627, "success_rate.epoch.global": 0.6976197472818102, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9900210084033614, "tokens_p.mean_in_band": 0.7777777777777778, "tokens_rate.above_band": 0.9296875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0703125 }, { "epoch": 0.7621343085106383, "grad_norm": 59.196976003857145, "learning_rate": 1.8803904498833565e-07, "loss": 0.6436, "step": 4585, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4879518072289157, "success_rate.epoch.env.logic": 0.44754316069057104, "success_rate.epoch.env.math": 0.8994764397905759, "success_rate.epoch.env.sat": 0.10238907849829351, "success_rate.epoch.env.science": 0.758057194734453, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49225855344265596, "success_rate.epoch.global": 0.697841726618705, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994951923076923, "tokens_p.mean_in_band": 0.6676136363636364, "tokens_rate.above_band": 0.9219858156028369, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07801418439716312 }, { "epoch": 0.7629654255319149, "grad_norm": 183.89250930950763, "learning_rate": 1.8801464101082725e-07, "loss": 0.4558, "step": 4590, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4879518072289157, "success_rate.epoch.env.logic": 0.44754316069057104, "success_rate.epoch.env.math": 0.8987473903966597, "success_rate.epoch.env.sat": 0.10238907849829351, "success_rate.epoch.env.science": 0.758057194734453, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4921922762250272, "success_rate.epoch.global": 0.6978280011740534, "success_rate.window.env.math": 0.8, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5166666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9882352941176471, "tokens_p.mean_in_band": 0.579358552631579, "tokens_rate.above_band": 0.8173076923076923, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18269230769230768 }, { "epoch": 0.7637965425531915, "grad_norm": 90.28466282422191, "learning_rate": 1.8799022280929457e-07, "loss": 0.5128, "step": 4595, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4879518072289157, "success_rate.epoch.env.logic": 0.4482758620689655, "success_rate.epoch.env.math": 0.8989583333333333, "success_rate.epoch.env.sat": 0.10238907849829351, "success_rate.epoch.env.science": 0.7582766439909298, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4922980120042586, "success_rate.epoch.global": 0.6981381029174608, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.988031914893617, "tokens_p.mean_in_band": 0.8483072916666666, "tokens_rate.above_band": 0.9690721649484536, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030927835051546393 }, { "epoch": 0.7646276595744681, "grad_norm": 57.17971859733982, "learning_rate": 1.879657904270739e-07, "loss": 0.4732, "step": 4600, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48502994011976047, "success_rate.epoch.env.logic": 0.44900662251655626, "success_rate.epoch.env.math": 0.899063475546306, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.758440969861772, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49240087198559546, "success_rate.epoch.global": 0.6983011130638547, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9913245178258329, "tokens_p.mean_below_band": 5.885958671569824e-07, "tokens_p.mean_in_band": 0.5678986025528169, "tokens_rate.above_band": 0.8572144288577155, "tokens_rate.below_band": 0.000501002004008016, "tokens_rate.in_band": 0.14228456913827656 }, { "epoch": 0.7654587765957447, "grad_norm": 61.728200587299696, "learning_rate": 1.8794134390752677e-07, "loss": 0.2586, "step": 4605, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48502994011976047, "success_rate.epoch.env.logic": 0.44900662251655626, "success_rate.epoch.env.math": 0.899063475546306, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.7586050724637681, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4924157904039587, "success_rate.epoch.global": 0.6984336114770897, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9893973214285714, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 0.7662898936170213, "grad_norm": 126.1425396514624, "learning_rate": 1.8791688329403973e-07, "loss": 0.405, "step": 4610, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48502994011976047, "success_rate.epoch.env.logic": 0.44900662251655626, "success_rate.epoch.env.math": 0.899063475546306, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.7586518887129609, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49242004642661263, "success_rate.epoch.global": 0.6985078993563487, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.16666666666666669, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9936143761301989, "tokens_p.mean_in_band": 0.65159375, "tokens_rate.above_band": 0.8984565393988627, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10154346060113728 }, { "epoch": 0.7671210106382979, "grad_norm": 103.05027562454406, "learning_rate": 1.8789240863002443e-07, "loss": 0.394, "step": 4615, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.44900662251655626, "success_rate.epoch.env.math": 0.8991683991683992, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.7585895117540687, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4927025777574016, "success_rate.epoch.global": 0.6985820786434732, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.989029255319149, "tokens_p.mean_in_band": 0.7979403409090909, "tokens_rate.above_band": 0.962457337883959, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03754266211604096 }, { "epoch": 0.7679521276595744, "grad_norm": 235.67823000533363, "learning_rate": 1.8786791995891731e-07, "loss": 0.5307, "step": 4620, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.44900662251655626, "success_rate.epoch.env.math": 0.8992731048805815, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.7585817524841915, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49271139107033846, "success_rate.epoch.global": 0.698656149576395, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9877717391304348, "tokens_p.mean_below_band": 3.213062882423401e-08, "tokens_p.mean_in_band": 0.8000710227272727, "tokens_rate.above_band": 0.9387755102040817, "tokens_rate.below_band": 0.00510204081632653, "tokens_rate.in_band": 0.05612244897959184 }, { "epoch": 0.768783244680851, "grad_norm": 119.01019874978113, "learning_rate": 1.8784341732417986e-07, "loss": 0.2938, "step": 4625, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4497354497354497, "success_rate.epoch.env.math": 0.8995859213250518, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.7587996389891697, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4928258938129149, "success_rate.epoch.global": 0.6990078786110301, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9949596774193549, "tokens_p.mean_in_band": 0.80078125, "tokens_rate.above_band": 0.9959839357429718, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004016064257028112 }, { "epoch": 0.7696143617021277, "grad_norm": 63.59816524060304, "learning_rate": 1.8781890076929817e-07, "loss": 0.4444, "step": 4630, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4497354497354497, "success_rate.epoch.env.math": 0.8995859213250518, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.7589004055881028, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49283505441281794, "success_rate.epoch.global": 0.6991253644314869, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9970734126984127, "tokens_p.mean_in_band": 0.7033854166666667, "tokens_rate.above_band": 0.9545454545454546, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045454545454545456 }, { "epoch": 0.7704454787234043, "grad_norm": 87.13401439465125, "learning_rate": 1.877943703377832e-07, "loss": 0.3747, "step": 4635, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.449868073878628, "success_rate.epoch.env.math": 0.8996897621509824, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.759009009009009, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49286642426645566, "success_rate.epoch.global": 0.6991988346686089, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957425068119891, "tokens_p.mean_in_band": 0.6631804435483871, "tokens_rate.above_band": 0.9594771241830066, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.040522875816993466 }, { "epoch": 0.7712765957446809, "grad_norm": 125.8055342406205, "learning_rate": 1.8776982607317038e-07, "loss": 0.4595, "step": 4640, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.449868073878628, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.7590632740373789, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49289956089167275, "success_rate.epoch.global": 0.6993739991265104, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9970238095238095, "tokens_p.mean_in_band": 0.4344429347826087, "tokens_rate.above_band": 0.9319526627218935, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06804733727810651 }, { "epoch": 0.7721077127659575, "grad_norm": 111.69254458982232, "learning_rate": 1.8774526801901982e-07, "loss": 0.5783, "step": 4645, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4492753623188406, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.7592800899887514, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49286538856363493, "success_rate.epoch.global": 0.6994471923188827, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.4375, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9982127383015598, "tokens_p.mean_in_band": 0.5079385080645161, "tokens_rate.above_band": 0.9490131578947368, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05098684210526316 }, { "epoch": 0.772938829787234, "grad_norm": 278.6715397925128, "learning_rate": 1.87720696218916e-07, "loss": 0.3832, "step": 4650, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4486842105263158, "success_rate.epoch.env.math": 0.8990731204943357, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.7591093117408907, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.492711860423085, "success_rate.epoch.global": 0.6991420677621055, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9966346153846154, "tokens_p.mean_below_band": 6.007030606269836e-08, "tokens_p.mean_in_band": 0.5984375, "tokens_rate.above_band": 0.9544787077826725, "tokens_rate.below_band": 0.0014684287812041115, "tokens_rate.in_band": 0.04405286343612335 }, { "epoch": 0.7737699468085106, "grad_norm": 89.42207069568659, "learning_rate": 1.8769611071646794e-07, "loss": 0.4132, "step": 4655, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4486842105263158, "success_rate.epoch.env.math": 0.8991769547325102, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.7593799146259268, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4927459001615588, "success_rate.epoch.global": 0.6994043295074822, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9847383720930233, "tokens_p.mean_in_band": 0.853515625, "tokens_rate.above_band": 0.9699248120300752, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03007518796992481 }, { "epoch": 0.7746010638297872, "grad_norm": 98.70776367422869, "learning_rate": 1.8767151155530878e-07, "loss": 0.5693, "step": 4660, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4486842105263158, "success_rate.epoch.env.math": 0.8992805755395683, "success_rate.epoch.env.sat": 0.10508474576271186, "success_rate.epoch.env.science": 0.7593714927048261, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49272206086864245, "success_rate.epoch.global": 0.6993758165190884, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.3, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9972564017292983, "tokens_p.mean_in_band": 0.6680445995145631, "tokens_rate.above_band": 0.93588546529723, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06411453470277 }, { "epoch": 0.7754321808510638, "grad_norm": 158.28623187938004, "learning_rate": 1.8764689877909605e-07, "loss": 0.4424, "step": 4665, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.45013123359580054, "success_rate.epoch.env.math": 0.8993839835728953, "success_rate.epoch.env.sat": 0.10508474576271186, "success_rate.epoch.env.science": 0.7593630858936982, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4928622448951592, "success_rate.epoch.global": 0.6995359628770301, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9898255813953488, "tokens_p.mean_below_band": 1.8189894035458565e-09, "tokens_p.mean_in_band": 0.7298583984375, "tokens_rate.above_band": 0.9662921348314607, "tokens_rate.below_band": 0.003745318352059925, "tokens_rate.in_band": 0.0299625468164794 }, { "epoch": 0.7762632978723404, "grad_norm": 112.02966213611131, "learning_rate": 1.876222724315114e-07, "loss": 0.4689, "step": 4670, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4508519003931848, "success_rate.epoch.env.math": 0.8993839835728953, "success_rate.epoch.env.sat": 0.10508474576271186, "success_rate.epoch.env.science": 0.759300761990139, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4929220942491433, "success_rate.epoch.global": 0.6995652173913044, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6785714285714286, "success_rate.window.global": 0.7777777777777778, "tokens_p.mean_above_band": 0.994526836158192, "tokens_p.mean_in_band": 0.5738407258064516, "tokens_rate.above_band": 0.9580514208389715, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04194857916102842 }, { "epoch": 0.777094414893617, "grad_norm": 87.2594615700588, "learning_rate": 1.8759763255626054e-07, "loss": 0.4573, "step": 4675, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4508519003931848, "success_rate.epoch.env.math": 0.8997955010224948, "success_rate.epoch.env.sat": 0.10508474576271186, "success_rate.epoch.env.science": 0.7594624860022396, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4929742071092978, "success_rate.epoch.global": 0.6998696974084262, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.999664810231023, "tokens_p.mean_in_band": 0.5460611979166666, "tokens_rate.above_band": 0.9619047619047619, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0380952380952381 }, { "epoch": 0.7779255319148937, "grad_norm": 98.47884497130033, "learning_rate": 1.875729791970732e-07, "loss": 0.6578, "step": 4680, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.45157068062827227, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.sat": 0.10508474576271186, "success_rate.epoch.env.science": 0.7596239928379588, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49307282402278063, "success_rate.epoch.global": 0.7001301894980472, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9997102009273571, "tokens_p.mean_in_band": 0.5305266203703703, "tokens_rate.above_band": 0.9599406528189911, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.040059347181008904 }, { "epoch": 0.7787566489361702, "grad_norm": 79.4876342910686, "learning_rate": 1.8754831239770312e-07, "loss": 0.4461, "step": 4685, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.45157068062827227, "success_rate.epoch.env.math": 0.9, "success_rate.epoch.env.sat": 0.10472972972972973, "success_rate.epoch.env.science": 0.7598926894701542, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4930649768045274, "success_rate.epoch.global": 0.7002457002457002, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4523809523809524, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9959072305593452, "tokens_p.mean_in_band": 0.640869140625, "tokens_rate.above_band": 0.93854033290653, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06145966709346991 }, { "epoch": 0.7795877659574468, "grad_norm": 73.45925761774734, "learning_rate": 1.875236322019278e-07, "loss": 0.5456, "step": 4690, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4516971279373368, "success_rate.epoch.env.math": 0.90020366598778, "success_rate.epoch.env.sat": 0.10472972972972973, "success_rate.epoch.env.science": 0.7598926894701542, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4930949871042405, "success_rate.epoch.global": 0.700274447493861, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.575, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9897186147186147, "tokens_p.mean_in_band": 0.4779641544117647, "tokens_rate.above_band": 0.7310126582278481, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2689873417721519 }, { "epoch": 0.7804188829787234, "grad_norm": 109.79854067353953, "learning_rate": 1.874989386535485e-07, "loss": 0.4015, "step": 4695, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4516971279373368, "success_rate.epoch.env.math": 0.900709219858156, "success_rate.epoch.env.sat": 0.10472972972972973, "success_rate.epoch.env.science": 0.760053619302949, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4931555765318015, "success_rate.epoch.global": 0.7006204010965229, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.988108407079646, "tokens_p.mean_below_band": 1.126900315284729e-07, "tokens_p.mean_in_band": 0.84140625, "tokens_rate.above_band": 0.9495798319327731, "tokens_rate.below_band": 0.008403361344537815, "tokens_rate.in_band": 0.04201680672268908 }, { "epoch": 0.78125, "grad_norm": 109.7366112421173, "learning_rate": 1.8747423179639027e-07, "loss": 0.3603, "step": 4700, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.4516971279373368, "success_rate.epoch.env.math": 0.9008097165991903, "success_rate.epoch.env.sat": 0.10472972972972973, "success_rate.epoch.env.science": 0.7603748326639893, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49319391381380834, "success_rate.epoch.global": 0.7009224560392043, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9846854304635762, "tokens_p.mean_in_band": 0.80625, "tokens_rate.above_band": 0.967948717948718, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03205128205128205 }, { "epoch": 0.7820811170212766, "grad_norm": 56.87436390428632, "learning_rate": 1.874495116743017e-07, "loss": 0.6172, "step": 4705, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4880952380952381, "success_rate.epoch.env.logic": 0.45241199478487615, "success_rate.epoch.env.math": 0.901010101010101, "success_rate.epoch.env.sat": 0.10437710437710437, "success_rate.epoch.env.science": 0.7604282846308276, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49324992089314124, "success_rate.epoch.global": 0.700993806711796, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9935661764705882, "tokens_p.mean_below_band": 4.5821070671081543e-07, "tokens_p.mean_in_band": 0.16626420454545454, "tokens_rate.above_band": 0.38095238095238093, "tokens_rate.below_band": 0.0028011204481792717, "tokens_rate.in_band": 0.6162464985994398 }, { "epoch": 0.7829122340425532, "grad_norm": 64.95903033996777, "learning_rate": 1.8742477833115493e-07, "loss": 0.5067, "step": 4710, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4911242603550296, "success_rate.epoch.env.logic": 0.45241199478487615, "success_rate.epoch.env.math": 0.901010101010101, "success_rate.epoch.env.sat": 0.10437710437710437, "success_rate.epoch.env.science": 0.7606418542455984, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49354470197264694, "success_rate.epoch.global": 0.7012089810017271, "success_rate.window.env.ded": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.988080039964476, "tokens_p.mean_in_band": 0.5522765456989247, "tokens_rate.above_band": 0.8288553551711446, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17114464482885536 }, { "epoch": 0.7837433510638298, "grad_norm": 78.81993271704219, "learning_rate": 1.8740003181084564e-07, "loss": 0.2517, "step": 4715, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49411764705882355, "success_rate.epoch.env.logic": 0.45241199478487615, "success_rate.epoch.env.math": 0.901010101010101, "success_rate.epoch.env.sat": 0.10437710437710437, "success_rate.epoch.env.science": 0.7608550434201736, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49383620887068047, "success_rate.epoch.global": 0.7014238458219474, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.995873786407767, "tokens_p.mean_in_band": 0.7572544642857143, "tokens_rate.above_band": 0.9735349716446124, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026465028355387523 }, { "epoch": 0.7845744680851063, "grad_norm": 94.4205246120049, "learning_rate": 1.873752721572928e-07, "loss": 0.4092, "step": 4720, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49411764705882355, "success_rate.epoch.env.logic": 0.45241199478487615, "success_rate.epoch.env.math": 0.9011099899091827, "success_rate.epoch.env.sat": 0.10437710437710437, "success_rate.epoch.env.science": 0.7610146862483311, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4938598026640659, "success_rate.epoch.global": 0.7015955153083225, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.990296803652968, "tokens_p.mean_in_band": 0.720703125, "tokens_rate.above_band": 0.9647577092511013, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03524229074889868 }, { "epoch": 0.785405585106383, "grad_norm": 270.09233023408433, "learning_rate": 1.8735049941443872e-07, "loss": 0.5048, "step": 4725, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49411764705882355, "success_rate.epoch.env.logic": 0.45241199478487615, "success_rate.epoch.env.math": 0.9013091641490433, "success_rate.epoch.env.sat": 0.10437710437710437, "success_rate.epoch.env.science": 0.7611209964412812, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4938875739761395, "success_rate.epoch.global": 0.7017669875017957, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9905555555555555, "tokens_p.mean_in_band": 0.6970052083333333, "tokens_rate.above_band": 0.8823529411764706, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11764705882352941 }, { "epoch": 0.7862367021276596, "grad_norm": 54.12579572871874, "learning_rate": 1.87325713626249e-07, "loss": 0.4699, "step": 4730, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49411764705882355, "success_rate.epoch.env.logic": 0.45241199478487615, "success_rate.epoch.env.math": 0.9015075376884422, "success_rate.epoch.env.sat": 0.10437710437710437, "success_rate.epoch.env.science": 0.7613333333333333, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49392491128808963, "success_rate.epoch.global": 0.7020238266111669, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.8571428571428572, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9881993006993007, "tokens_p.mean_in_band": 0.66552734375, "tokens_rate.above_band": 0.9470198675496688, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.052980132450331126 }, { "epoch": 0.7870678191489362, "grad_norm": 65.33434926989125, "learning_rate": 1.873009148367123e-07, "loss": 0.3686, "step": 4735, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49707602339181284, "success_rate.epoch.env.logic": 0.45241199478487615, "success_rate.epoch.env.math": 0.9015075376884422, "success_rate.epoch.env.sat": 0.10437710437710437, "success_rate.epoch.env.science": 0.7615452930728241, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4939519717667284, "success_rate.epoch.global": 0.7021368134232038, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968112244897959, "tokens_p.mean_in_band": 0.7623697916666666, "tokens_rate.above_band": 0.9961880559085133, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0038119440914866584 }, { "epoch": 0.7878989361702128, "grad_norm": 127.47005455921338, "learning_rate": 1.8727610308984044e-07, "loss": 0.502, "step": 4740, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.45241199478487615, "success_rate.epoch.env.math": 0.9017051153460381, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7614821388950521, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4941981664347731, "success_rate.epoch.global": 0.7021489971346705, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9927998812351544, "tokens_p.mean_in_band": 0.611686862244898, "tokens_rate.above_band": 0.8957446808510638, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10425531914893617 }, { "epoch": 0.7887300531914894, "grad_norm": 76.46894568721432, "learning_rate": 1.8725127842966812e-07, "loss": 0.3691, "step": 4745, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.45241199478487615, "success_rate.epoch.env.math": 0.9018036072144289, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7616936377743294, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49422634741183374, "success_rate.epoch.global": 0.7023622047244095, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9893830128205128, "tokens_p.mean_in_band": 0.5825639204545454, "tokens_rate.above_band": 0.8764044943820225, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12359550561797752 }, { "epoch": 0.789561170212766, "grad_norm": 46.0606289653229, "learning_rate": 1.8722644090025308e-07, "loss": 0.4443, "step": 4750, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.453125, "success_rate.epoch.env.math": 0.9018036072144289, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7616832779623477, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4942902242666649, "success_rate.epoch.global": 0.7024320457796852, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9892293689320388, "tokens_p.mean_below_band": 1.6555645743210334e-12, "tokens_p.mean_in_band": 0.7705965909090909, "tokens_rate.above_band": 0.8995633187772926, "tokens_rate.below_band": 0.004366812227074236, "tokens_rate.in_band": 0.09606986899563319 }, { "epoch": 0.7903922872340425, "grad_norm": 44.382224407806696, "learning_rate": 1.8720159054567584e-07, "loss": 0.3246, "step": 4755, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4538361508452536, "success_rate.epoch.env.math": 0.9019019019019019, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7617887978746956, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49437340294348986, "success_rate.epoch.global": 0.7026022304832714, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9047619047619048, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9885737885462555, "tokens_p.mean_in_band": 0.7067792338709677, "tokens_rate.above_band": 0.8798449612403101, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12015503875968993 }, { "epoch": 0.7912234042553191, "grad_norm": 481.43337915230035, "learning_rate": 1.871767274100397e-07, "loss": 0.5468, "step": 4760, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.45454545454545453, "success_rate.epoch.env.math": 0.9019019019019019, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7617783676177837, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4944369368928798, "success_rate.epoch.global": 0.7026718102586084, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5666666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.995365466101695, "tokens_p.mean_in_band": 0.653671875, "tokens_rate.above_band": 0.9496981891348089, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05030181086519115 }, { "epoch": 0.7920545212765957, "grad_norm": 448.1947975683327, "learning_rate": 1.8715185153747062e-07, "loss": 0.4941, "step": 4765, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.45454545454545453, "success_rate.epoch.env.math": 0.902, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7618732052131655, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49445447650137786, "success_rate.epoch.global": 0.7028261490151299, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994050875273523, "tokens_p.mean_in_band": 0.658203125, "tokens_rate.above_band": 0.9744136460554371, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0255863539445629 }, { "epoch": 0.7928856382978723, "grad_norm": 80.66313638530086, "learning_rate": 1.871269629721172e-07, "loss": 0.5221, "step": 4770, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.45454545454545453, "success_rate.epoch.env.math": 0.902, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7620309050772627, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49446881285265953, "success_rate.epoch.global": 0.7029533456983877, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9901620370370371, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.9432314410480349, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.056768558951965066 }, { "epoch": 0.793716755319149, "grad_norm": 123.52223254235444, "learning_rate": 1.8710206175815048e-07, "loss": 0.501, "step": 4775, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.45395590142671854, "success_rate.epoch.env.math": 0.9020979020979021, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7622932745314223, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49444796907387095, "success_rate.epoch.global": 0.7031071835803877, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9928192800788954, "tokens_p.mean_in_band": 0.6898777173913043, "tokens_rate.above_band": 0.9168173598553345, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08318264014466546 }, { "epoch": 0.7945478723404256, "grad_norm": 98.88084300513229, "learning_rate": 1.870771479397641e-07, "loss": 0.4787, "step": 4780, "success_rate.epoch.env.abd": 0.45454545454545453, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4533678756476684, "success_rate.epoch.env.math": 0.9022931206380858, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7622824410663142, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4944112744644186, "success_rate.epoch.global": 0.7031183255019222, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9988938053097345, "tokens_p.mean_in_band": 0.6005208333333333, "tokens_rate.above_band": 0.9576271186440678, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0423728813559322 }, { "epoch": 0.7953789893617021, "grad_norm": 137.35959469548166, "learning_rate": 1.8705222156117395e-07, "loss": 0.4876, "step": 4785, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4533678756476684, "success_rate.epoch.env.math": 0.9025844930417495, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7624394539850287, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49353376312090674, "success_rate.epoch.global": 0.7032716927453769, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9884956173780488, "tokens_p.mean_below_band": 1.7229467630386353e-08, "tokens_p.mean_in_band": 0.17882948833459789, "tokens_rate.above_band": 0.33223600911623197, "tokens_rate.below_band": 0.0002532286654849329, "tokens_rate.in_band": 0.6675107622182831 }, { "epoch": 0.7962101063829787, "grad_norm": 55.68244950472272, "learning_rate": 1.8702728266661825e-07, "loss": 0.4169, "step": 4790, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.45348837209302323, "success_rate.epoch.env.math": 0.9026812313803376, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.762544014084507, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4935630172012177, "success_rate.epoch.global": 0.7033404406538735, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978911042944786, "tokens_p.mean_in_band": 0.5463709677419355, "tokens_rate.above_band": 0.9633569739952719, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03664302600472813 }, { "epoch": 0.7970412234042553, "grad_norm": 107.77214994914767, "learning_rate": 1.8700233130035744e-07, "loss": 0.4674, "step": 4795, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4567901234567901, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.45348837209302323, "success_rate.epoch.env.math": 0.9028741328047571, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7627006817681988, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49333686841688634, "success_rate.epoch.global": 0.7034512143161483, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.993421052631579, "tokens_p.mean_in_band": 0.794677734375, "tokens_rate.above_band": 0.9726962457337884, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027303754266211604 }, { "epoch": 0.7978723404255319, "grad_norm": 106.40908438699832, "learning_rate": 1.8697736750667404e-07, "loss": 0.5547, "step": 4800, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4567901234567901, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.45348837209302323, "success_rate.epoch.env.math": 0.902970297029703, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7626373626373626, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49333985433453265, "success_rate.epoch.global": 0.7034776437189496, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.575, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9938345546786922, "tokens_p.mean_below_band": 5.995204332975845e-15, "tokens_p.mean_in_band": 0.543395011655746, "tokens_rate.above_band": 0.877132262051916, "tokens_rate.below_band": 0.00024721878862793575, "tokens_rate.in_band": 0.12262051915945611 }, { "epoch": 0.7987034574468085, "grad_norm": 94.4949934902306, "learning_rate": 1.8695239132987267e-07, "loss": 0.3545, "step": 4805, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4567901234567901, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.26666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.45348837209302323, "success_rate.epoch.env.math": 0.9030662710187932, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7627937623544916, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4933627973987344, "success_rate.epoch.global": 0.7036459072208824, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9889937106918238, "tokens_p.mean_in_band": 0.6333333333333333, "tokens_rate.above_band": 0.9137931034482759, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08620689655172414 }, { "epoch": 0.7995345744680851, "grad_norm": 103.10973473342527, "learning_rate": 1.8692740281427998e-07, "loss": 0.4871, "step": 4810, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4567901234567901, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3125, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4529032258064516, "success_rate.epoch.env.math": 0.9031620553359684, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7629499561018437, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4974991760452152, "success_rate.epoch.global": 0.703756201275691, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.5599999999999999, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9942062248096731, "tokens_p.mean_below_band": 2.1736923372372985e-10, "tokens_p.mean_in_band": 0.5403772249190939, "tokens_rate.above_band": 0.8780967361384192, "tokens_rate.below_band": 0.00039323633503735744, "tokens_rate.in_band": 0.12151002752654345 }, { "epoch": 0.8003656914893617, "grad_norm": 79.63995874347275, "learning_rate": 1.8690240200424442e-07, "loss": 0.5516, "step": 4815, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4539877300613497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3125, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028901734104047, "success_rate.epoch.env.logic": 0.4529032258064516, "success_rate.epoch.env.math": 0.903448275862069, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7629385964912281, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49753214340252877, "success_rate.epoch.global": 0.703850509626274, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942651098901099, "tokens_p.mean_in_band": 0.5072115384615384, "tokens_rate.above_band": 0.9859154929577465, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014084507042253521 }, { "epoch": 0.8011968085106383, "grad_norm": 74.04759828394826, "learning_rate": 1.8687738894413635e-07, "loss": 0.5614, "step": 4820, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4539877300613497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3125, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028901734104047, "success_rate.epoch.env.logic": 0.4529032258064516, "success_rate.epoch.env.math": 0.9036381514257621, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7629791894852136, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4975530950895905, "success_rate.epoch.global": 0.7040022627633998, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9886363636363636, "tokens_p.mean_in_band": 0.5535386029411765, "tokens_rate.above_band": 0.8859060402684564, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11409395973154363 }, { "epoch": 0.8020279255319149, "grad_norm": 343.6331133242387, "learning_rate": 1.8685236367834774e-07, "loss": 0.4205, "step": 4825, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4539877300613497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3125, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028901734104047, "success_rate.epoch.env.logic": 0.45314505776636715, "success_rate.epoch.env.math": 0.9037328094302554, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7630310994305738, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4975884041722967, "success_rate.epoch.global": 0.7039706090151194, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9956226745458525, "tokens_p.mean_in_band": 0.5469341856060606, "tokens_rate.above_band": 0.8964096527369041, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10359034726309593 }, { "epoch": 0.8028590425531915, "grad_norm": 69.80312069617709, "learning_rate": 1.868273262512924e-07, "loss": 0.3274, "step": 4830, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4539877300613497, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3125, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028901734104047, "success_rate.epoch.env.logic": 0.454661558109834, "success_rate.epoch.env.math": 0.9037328094302554, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7631348511383538, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4977356998133192, "success_rate.epoch.global": 0.7040801920090357, "success_rate.window.env.logic": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9940548780487805, "tokens_p.mean_in_band": 0.7297952586206896, "tokens_rate.above_band": 0.9339407744874715, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06605922551252848 }, { "epoch": 0.8036901595744681, "grad_norm": 137.664539334712, "learning_rate": 1.8680227670740552e-07, "loss": 0.5603, "step": 4835, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3125, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028901734104047, "success_rate.epoch.env.logic": 0.454661558109834, "success_rate.epoch.env.math": 0.9038272816486752, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7632385120350109, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4975020557450327, "success_rate.epoch.global": 0.7041061097784677, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9936926605504587, "tokens_p.mean_in_band": 0.6197916666666666, "tokens_rate.above_band": 0.923728813559322, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07627118644067797 }, { "epoch": 0.8045212765957447, "grad_norm": 68.32100318860228, "learning_rate": 1.8677721509114402e-07, "loss": 0.5593, "step": 4840, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3125, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028901734104047, "success_rate.epoch.env.logic": 0.454661558109834, "success_rate.epoch.env.math": 0.903921568627451, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.7633938333697792, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49752474740990044, "success_rate.epoch.global": 0.7042730221407418, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9912790697674418, "tokens_p.mean_in_band": 0.6490162037037037, "tokens_rate.above_band": 0.864321608040201, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.135678391959799 }, { "epoch": 0.8053523936170213, "grad_norm": 82.43262796546212, "learning_rate": 1.8675214144698605e-07, "loss": 0.487, "step": 4845, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3125, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028901734104047, "success_rate.epoch.env.logic": 0.454661558109834, "success_rate.epoch.env.math": 0.9040156709108716, "success_rate.epoch.env.sat": 0.10367892976588629, "success_rate.epoch.env.science": 0.7637038654728107, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.497529858183969, "success_rate.epoch.global": 0.7044654176644598, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9903273809523809, "tokens_p.mean_in_band": 0.6241957720588235, "tokens_rate.above_band": 0.860655737704918, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13934426229508196 }, { "epoch": 0.8061835106382979, "grad_norm": 73.98335530994913, "learning_rate": 1.8672705581943123e-07, "loss": 0.4966, "step": 4850, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3125, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028901734104047, "success_rate.epoch.env.logic": 0.454661558109834, "success_rate.epoch.env.math": 0.9042033235581622, "success_rate.epoch.env.sat": 0.10367892976588629, "success_rate.epoch.env.science": 0.7638585770405937, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4975609822035212, "success_rate.epoch.global": 0.7046734234234234, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9939786038728186, "tokens_p.mean_below_band": 2.1047890186309814e-07, "tokens_p.mean_in_band": 0.4960646299254526, "tokens_rate.above_band": 0.8165137614678899, "tokens_rate.below_band": 0.00019519812609798947, "tokens_rate.in_band": 0.1832910404060121 }, { "epoch": 0.8070146276595744, "grad_norm": 165.12937411134914, "learning_rate": 1.8670195825300036e-07, "loss": 0.5245, "step": 4855, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3125, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028901734104047, "success_rate.epoch.env.logic": 0.454661558109834, "success_rate.epoch.env.math": 0.904390243902439, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.7637949836423119, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4978438061895916, "success_rate.epoch.global": 0.7047819971870605, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9910493827160494, "tokens_p.mean_in_band": 0.7693014705882353, "tokens_rate.above_band": 0.9225512528473804, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0774487471526196 }, { "epoch": 0.807845744680851, "grad_norm": 77.03434377430698, "learning_rate": 1.8667684879223553e-07, "loss": 0.3306, "step": 4860, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3125, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.45535714285714285, "success_rate.epoch.env.math": 0.904390243902439, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.7638464893153074, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.49764898046321887, "success_rate.epoch.global": 0.7047659215520877, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9882422654690619, "tokens_p.mean_in_band": 0.5029169386422977, "tokens_rate.above_band": 0.7234657039711191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.27653429602888085 }, { "epoch": 0.8086768617021277, "grad_norm": 104.41531889994998, "learning_rate": 1.8665172748169981e-07, "loss": 0.5173, "step": 4865, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3125, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4547770700636943, "success_rate.epoch.env.math": 0.904390243902439, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.7636799651188141, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.4975811080095878, "success_rate.epoch.global": 0.7045678144764582, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.27777777777777773, "success_rate.window.global": 0.4, "tokens_p.mean_above_band": 0.9997700515084621, "tokens_p.mean_below_band": 5.343463271856308e-08, "tokens_p.mean_in_band": 0.6004569575471698, "tokens_rate.above_band": 0.9611032531824611, "tokens_rate.below_band": 0.0014144271570014145, "tokens_rate.in_band": 0.03748231966053748 }, { "epoch": 0.8095079787234043, "grad_norm": 69.62539356359133, "learning_rate": 1.8662659436597742e-07, "loss": 0.4742, "step": 4870, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.35294117647058826, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4547770700636943, "success_rate.epoch.env.math": 0.904390243902439, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.7638858636462644, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5012762966457731, "success_rate.epoch.global": 0.7047752808988764, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.995343137254902, "tokens_p.mean_in_band": 0.755078125, "tokens_rate.above_band": 0.9622641509433962, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03773584905660377 }, { "epoch": 0.8103390957446809, "grad_norm": 85.62866956559783, "learning_rate": 1.8660144948967348e-07, "loss": 0.4147, "step": 4875, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.35294117647058826, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4547770700636943, "success_rate.epoch.env.math": 0.904390243902439, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.7641940395910376, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5013043126407525, "success_rate.epoch.global": 0.70502385630087, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9945032573289903, "tokens_p.mean_in_band": 0.778125, "tokens_rate.above_band": 0.953416149068323, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.046583850931677016 }, { "epoch": 0.8111702127659575, "grad_norm": 63.483552341582175, "learning_rate": 1.8657629289741398e-07, "loss": 0.4129, "step": 4880, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.35294117647058826, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4547770700636943, "success_rate.epoch.env.math": 0.904390243902439, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.7644502390265102, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5013276034985228, "success_rate.epoch.global": 0.7052306829336699, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9889846743295019, "tokens_p.mean_in_band": 0.7462890625, "tokens_rate.above_band": 0.9288256227758007, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0711743772241993 }, { "epoch": 0.812001329787234, "grad_norm": 101.85302100317311, "learning_rate": 1.8655112463384573e-07, "loss": 0.4437, "step": 4885, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.35294117647058826, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4547770700636943, "success_rate.epoch.env.math": 0.904390243902439, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.7646547980894486, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5013461997769717, "success_rate.epoch.global": 0.705395935529082, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.3888888888888889, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9921573359073359, "tokens_p.mean_in_band": 0.6309742647058824, "tokens_rate.above_band": 0.9384057971014492, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06159420289855073 }, { "epoch": 0.8128324468085106, "grad_norm": 81.62243492391613, "learning_rate": 1.8652594474363616e-07, "loss": 0.4112, "step": 4890, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.35294117647058826, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4541984732824427, "success_rate.epoch.env.math": 0.9044834307992202, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.7649609713790113, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5013299055410709, "success_rate.epoch.global": 0.7055858882822343, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976607270773639, "tokens_p.mean_in_band": 0.6684283088235294, "tokens_rate.above_band": 0.9480475382003396, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05195246179966044 }, { "epoch": 0.8136635638297872, "grad_norm": 79.96352676025562, "learning_rate": 1.865007532714735e-07, "loss": 0.5706, "step": 4895, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.35294117647058826, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.4541984732824427, "success_rate.epoch.env.math": 0.9044834307992202, "success_rate.epoch.env.sat": 0.10631229235880399, "success_rate.epoch.env.science": 0.764948006932409, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5015562513685872, "success_rate.epoch.global": 0.7055532242271646, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.5599999999999999, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.994770749665328, "tokens_p.mean_in_band": 0.6636458333333334, "tokens_rate.above_band": 0.9521988527724665, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04780114722753346 }, { "epoch": 0.8144946808510638, "grad_norm": 57.590716677674045, "learning_rate": 1.8647555026206637e-07, "loss": 0.3592, "step": 4900, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.35294117647058826, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.4541984732824427, "success_rate.epoch.env.math": 0.9045764362220059, "success_rate.epoch.env.sat": 0.10927152317880795, "success_rate.epoch.env.science": 0.7652023371564597, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5018568483201181, "success_rate.epoch.global": 0.7058412520961431, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9909695817490495, "tokens_p.mean_in_band": 0.8050130208333334, "tokens_rate.above_band": 0.9163763066202091, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08362369337979095 }, { "epoch": 0.8153257978723404, "grad_norm": 71.91926404585581, "learning_rate": 1.8645033576014394e-07, "loss": 0.4018, "step": 4905, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.35294117647058826, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.4548919949174079, "success_rate.epoch.env.math": 0.9045764362220059, "success_rate.epoch.env.sat": 0.10927152317880795, "success_rate.epoch.env.science": 0.7653039152065758, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.501929130109671, "success_rate.epoch.global": 0.7059645201843833, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9937782805429864, "tokens_p.mean_below_band": 2.7830537874251604e-10, "tokens_p.mean_in_band": 0.8141741071428571, "tokens_rate.above_band": 0.9650655021834061, "tokens_rate.below_band": 0.004366812227074236, "tokens_rate.in_band": 0.03056768558951965 }, { "epoch": 0.816156914893617, "grad_norm": 274.7525688377167, "learning_rate": 1.8642510981045584e-07, "loss": 0.3523, "step": 4910, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4457831325301205, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.35294117647058826, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.45558375634517767, "success_rate.epoch.env.math": 0.9046692607003891, "success_rate.epoch.env.sat": 0.10927152317880795, "success_rate.epoch.env.science": 0.7654054054054054, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5015154661496692, "success_rate.epoch.global": 0.7059316120027913, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9947382869692533, "tokens_p.mean_in_band": 0.7389705882352942, "tokens_rate.above_band": 0.9757142857142858, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024285714285714285 }, { "epoch": 0.8169880319148937, "grad_norm": 52.93996123958456, "learning_rate": 1.863998724577719e-07, "loss": 0.4959, "step": 4915, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4491017964071856, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.35294117647058826, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.4550063371356147, "success_rate.epoch.env.math": 0.9048543689320389, "success_rate.epoch.env.sat": 0.10927152317880795, "success_rate.epoch.env.science": 0.765290685109142, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5017710691135683, "success_rate.epoch.global": 0.7058987588899736, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.8125, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0, "tokens_p.mean_below_band": 1.7229467630386353e-08, "tokens_p.mean_in_band": 0.44343171296296297, "tokens_rate.above_band": 0.9700214132762313, "tokens_rate.below_band": 0.0010706638115631692, "tokens_rate.in_band": 0.028907922912205567 }, { "epoch": 0.8178191489361702, "grad_norm": 470.3258572603504, "learning_rate": 1.8637462374688222e-07, "loss": 0.3963, "step": 4920, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4491017964071856, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.35294117647058826, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.45569620253164556, "success_rate.epoch.env.math": 0.9049466537342385, "success_rate.epoch.env.sat": 0.10927152317880795, "success_rate.epoch.env.science": 0.7652774778665515, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5018409730186264, "success_rate.epoch.global": 0.7060052946913752, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.990625, "tokens_p.mean_in_band": 0.7449776785714286, "tokens_rate.above_band": 0.9554140127388535, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044585987261146494 }, { "epoch": 0.8186502659574468, "grad_norm": 222.2564187965822, "learning_rate": 1.863493637225971e-07, "loss": 0.5047, "step": 4925, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4491017964071856, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.35294117647058826, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.45512010113780027, "success_rate.epoch.env.math": 0.9049466537342385, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.7654294346137246, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5017696296789563, "success_rate.epoch.global": 0.7059314954051796, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.2916666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9998148148148148, "tokens_p.mean_in_band": 0.6492686170212766, "tokens_rate.above_band": 0.9349030470914127, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06509695290858726 }, { "epoch": 0.8194813829787234, "grad_norm": 146.8123613003801, "learning_rate": 1.8632409242974694e-07, "loss": 0.5037, "step": 4930, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.45512010113780027, "success_rate.epoch.env.math": 0.9050387596899225, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.7654666954084932, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5002969641868622, "success_rate.epoch.global": 0.705980528511822, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9928764559068219, "tokens_p.mean_in_band": 0.7292317708333333, "tokens_rate.above_band": 0.9524564183835182, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04754358161648178 }, { "epoch": 0.8203125, "grad_norm": 97.59546532651102, "learning_rate": 1.86298809913182e-07, "loss": 0.3214, "step": 4935, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.4552332912988651, "success_rate.epoch.env.math": 0.9051306873184899, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.7654028436018957, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5003098065489562, "success_rate.epoch.global": 0.7059477487493052, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.7000000000000001, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.995628612716763, "tokens_p.mean_in_band": 0.6509650735294118, "tokens_rate.above_band": 0.9621802002224694, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03781979977753059 }, { "epoch": 0.8211436170212766, "grad_norm": 82.8880972909026, "learning_rate": 1.8627351621777263e-07, "loss": 0.4876, "step": 4940, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.45591939546599497, "success_rate.epoch.env.math": 0.9052224371373307, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.7655038759689923, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5003897053083259, "success_rate.epoch.global": 0.7061111111111111, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959016393442623, "tokens_p.mean_in_band": 0.6470170454545454, "tokens_rate.above_band": 0.9682539682539683, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031746031746031744 }, { "epoch": 0.8219747340425532, "grad_norm": 103.36121219849849, "learning_rate": 1.862482113884089e-07, "loss": 0.3889, "step": 4945, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.45591939546599497, "success_rate.epoch.env.math": 0.9053140096618357, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.7654400688616312, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5003922294371572, "success_rate.epoch.global": 0.7061354802887285, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9903694968553459, "tokens_p.mean_below_band": 1.1641532182693481e-10, "tokens_p.mean_in_band": 0.7599113805970149, "tokens_rate.above_band": 0.9034090909090909, "tokens_rate.below_band": 0.0014204545454545455, "tokens_rate.in_band": 0.09517045454545454 }, { "epoch": 0.8228058510638298, "grad_norm": 78.78916167231668, "learning_rate": 1.8622289547000072e-07, "loss": 0.4149, "step": 4950, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.45534591194968554, "success_rate.epoch.env.math": 0.9053140096618357, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.7656417974629112, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5003584335357909, "success_rate.epoch.global": 0.706200582605077, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.0005203996669443, "tokens_p.mean_in_band": 0.6318359375, "tokens_rate.above_band": 0.9615692554043235, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03843074459567654 }, { "epoch": 0.8236369680851063, "grad_norm": 164.92010111231042, "learning_rate": 1.8619756850747761e-07, "loss": 0.5795, "step": 4955, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.45603015075376885, "success_rate.epoch.env.math": 0.9053140096618357, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.765843179377014, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5004389445101715, "success_rate.epoch.global": 0.7064042140282784, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9924679487179487, "tokens_p.mean_in_band": 0.5889559659090909, "tokens_rate.above_band": 0.8986175115207373, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10138248847926268 }, { "epoch": 0.824468085106383, "grad_norm": 227.3258338514629, "learning_rate": 1.8617223054578872e-07, "loss": 0.4598, "step": 4960, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.45603015075376885, "success_rate.epoch.env.math": 0.9053140096618357, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.76594374060554, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5004480864400375, "success_rate.epoch.global": 0.7064855875831486, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9930638647921028, "tokens_p.mean_in_band": 0.5413041893468118, "tokens_rate.above_band": 0.8386853988961365, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16131460110386353 }, { "epoch": 0.8252992021276596, "grad_norm": 118.00463653645956, "learning_rate": 1.8614688162990275e-07, "loss": 0.5387, "step": 4965, "success_rate.epoch.env.abd": 0.4444444444444444, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.45671267252195735, "success_rate.epoch.env.math": 0.9053140096618357, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.7660442154968877, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5005192679545408, "success_rate.epoch.global": 0.7066075633744285, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9955357142857143, "tokens_p.mean_in_band": 0.626953125, "tokens_rate.above_band": 0.9752808988764045, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024719101123595506 }, { "epoch": 0.8261303191489362, "grad_norm": 223.9414092082225, "learning_rate": 1.861215218048078e-07, "loss": 0.5116, "step": 4970, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.45671267252195735, "success_rate.epoch.env.math": 0.9053140096618357, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.766144604162197, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5016263300773862, "success_rate.epoch.global": 0.7067294378288562, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9971168154761905, "tokens_p.mean_in_band": 0.5672743055555556, "tokens_rate.above_band": 0.9613733905579399, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03862660944206009 }, { "epoch": 0.8269614361702128, "grad_norm": 101.33458403531469, "learning_rate": 1.8609615111551138e-07, "loss": 0.3921, "step": 4975, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.45807259073842305, "success_rate.epoch.env.math": 0.9053140096618357, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.766295025728988, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.501763633694046, "success_rate.epoch.global": 0.7069323370693233, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973370927318296, "tokens_p.mean_in_band": 0.5277901785714286, "tokens_rate.above_band": 0.957983193277311, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04201680672268908 }, { "epoch": 0.8277925531914894, "grad_norm": 41.12422575372285, "learning_rate": 1.8607076960704017e-07, "loss": 0.4036, "step": 4980, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.45807259073842305, "success_rate.epoch.env.math": 0.9054054054054054, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.7663951993141878, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5017161245623311, "success_rate.epoch.global": 0.706858407079646, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9942354368932039, "tokens_p.mean_in_band": 0.5442073170731707, "tokens_rate.above_band": 0.9094922737306843, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09050772626931568 }, { "epoch": 0.828623670212766, "grad_norm": 89.70388854311008, "learning_rate": 1.8604537732444023e-07, "loss": 0.369, "step": 4985, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5028571428571429, "success_rate.epoch.env.logic": 0.45807259073842305, "success_rate.epoch.env.math": 0.905587668593449, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.7664452539104349, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5017372443609028, "success_rate.epoch.global": 0.7069799585348998, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963533225283631, "tokens_p.mean_in_band": 0.6153273809523809, "tokens_rate.above_band": 0.9670846394984326, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032915360501567396 }, { "epoch": 0.8294547872340425, "grad_norm": 128.28428517730092, "learning_rate": 1.8601997431277656e-07, "loss": 0.489, "step": 4990, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5056818181818182, "success_rate.epoch.env.logic": 0.45807259073842305, "success_rate.epoch.env.math": 0.9056785370548605, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.7662170841361593, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5019815510892494, "success_rate.epoch.global": 0.7069465543433228, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9047619047619048, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9941099476439791, "tokens_p.mean_in_band": 0.608203125, "tokens_rate.above_band": 0.9744897959183674, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025510204081632654 }, { "epoch": 0.8302859042553191, "grad_norm": 115.69334908785314, "learning_rate": 1.8599456061713331e-07, "loss": 0.3986, "step": 4995, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5056818181818182, "success_rate.epoch.env.logic": 0.45807259073842305, "success_rate.epoch.env.math": 0.9056785370548605, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.766367137355584, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5019951922910152, "success_rate.epoch.global": 0.707067918277195, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9933850364963503, "tokens_p.mean_in_band": 0.6473958333333333, "tokens_rate.above_band": 0.9480968858131488, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05190311418685121 }, { "epoch": 0.8311170212765957, "grad_norm": 421.5381623064106, "learning_rate": 1.8596913628261358e-07, "loss": 0.4485, "step": 5000, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5056818181818182, "success_rate.epoch.env.logic": 0.45692883895131087, "success_rate.epoch.env.math": 0.9058597502401537, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.766303185802865, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5019018750042391, "success_rate.epoch.global": 0.7069369742104538, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9964048672566371, "tokens_p.mean_in_band": 0.634265988372093, "tokens_rate.above_band": 0.9484412470023981, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05155875299760192 }, { "epoch": 0.8319481382978723, "grad_norm": 96.13394926760594, "learning_rate": 1.8594370135433933e-07, "loss": 0.4203, "step": 5005, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5056818181818182, "success_rate.epoch.env.logic": 0.45692883895131087, "success_rate.epoch.env.math": 0.9059500959692899, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.7665028839991455, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5019282426338224, "success_rate.epoch.global": 0.7071389195148843, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.984375, "tokens_p.mean_in_band": 0.7435546875, "tokens_rate.above_band": 0.8701298701298701, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12987012987012986 }, { "epoch": 0.832779255319149, "grad_norm": 69.38275752445975, "learning_rate": 1.8591825587745129e-07, "loss": 0.3719, "step": 5010, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5056818181818182, "success_rate.epoch.env.logic": 0.45692883895131087, "success_rate.epoch.env.math": 0.9059500959692899, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.7667520273154076, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5019508920262099, "success_rate.epoch.global": 0.7073405866960474, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949919871794872, "tokens_p.mean_in_band": 0.79638671875, "tokens_rate.above_band": 0.975, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025 }, { "epoch": 0.8336103723404256, "grad_norm": 94.78114760423331, "learning_rate": 1.8589279989710897e-07, "loss": 0.4333, "step": 5015, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5056818181818182, "success_rate.epoch.env.logic": 0.45692883895131087, "success_rate.epoch.env.math": 0.9060402684563759, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.7670006395224899, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5019816906347706, "success_rate.epoch.global": 0.7075822209990368, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9900568181818182, "tokens_p.mean_in_band": 0.54130859375, "tokens_rate.above_band": 0.8979591836734694, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10204081632653061 }, { "epoch": 0.8344414893617021, "grad_norm": 44.62805235217876, "learning_rate": 1.8586733345849054e-07, "loss": 0.3602, "step": 5020, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5056818181818182, "success_rate.epoch.env.logic": 0.45692883895131087, "success_rate.epoch.env.math": 0.9060402684563759, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.7672982754949969, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.50197660444531, "success_rate.epoch.global": 0.7077261479241133, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9946581196581197, "tokens_p.mean_in_band": 0.7126302083333333, "tokens_rate.above_band": 0.8863636363636364, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11363636363636363 }, { "epoch": 0.8352726063829787, "grad_norm": 70.84938653094493, "learning_rate": 1.858418566067926e-07, "loss": 0.4653, "step": 5025, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5056818181818182, "success_rate.epoch.env.logic": 0.45760598503740646, "success_rate.epoch.env.math": 0.9061302681992337, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.7675951520306188, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5020733337511805, "success_rate.epoch.global": 0.7080472397692942, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9938568376068376, "tokens_p.mean_in_band": 0.551513671875, "tokens_rate.above_band": 0.936, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.064 }, { "epoch": 0.8361037234042553, "grad_norm": 66.59565573097711, "learning_rate": 1.8581636938723042e-07, "loss": 0.3828, "step": 5030, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5056818181818182, "success_rate.epoch.env.logic": 0.45828144458281445, "success_rate.epoch.env.math": 0.9061302681992337, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.7677433064173396, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5021482077450105, "success_rate.epoch.global": 0.7082075212736756, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9945600381194409, "tokens_p.mean_in_band": 0.6614583333333334, "tokens_rate.above_band": 0.9493365500603136, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05066344993968637 }, { "epoch": 0.8369348404255319, "grad_norm": 92.17755944276485, "learning_rate": 1.8579087184503753e-07, "loss": 0.2834, "step": 5035, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5056818181818182, "success_rate.epoch.env.logic": 0.458955223880597, "success_rate.epoch.env.math": 0.9063097514340345, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.7677433064173396, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5022257770661542, "success_rate.epoch.global": 0.7083276169570586, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9973148994515539, "tokens_p.mean_in_band": 0.7312677556818182, "tokens_rate.above_band": 0.961335676625659, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03866432337434095 }, { "epoch": 0.8377659574468085, "grad_norm": 63.12521647716537, "learning_rate": 1.8576536402546577e-07, "loss": 0.3666, "step": 5040, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5084745762711864, "success_rate.epoch.env.logic": 0.458955223880597, "success_rate.epoch.env.math": 0.9063097514340345, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.7677282377919321, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5024782942901508, "success_rate.epoch.global": 0.7083904579106115, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9900045955882353, "tokens_p.mean_in_band": 0.8170955882352942, "tokens_rate.above_band": 0.9411764705882353, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.058823529411764705 }, { "epoch": 0.8385970744680851, "grad_norm": 132.14260333855555, "learning_rate": 1.8573984597378533e-07, "loss": 0.5016, "step": 5045, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5084745762711864, "success_rate.epoch.env.logic": 0.458955223880597, "success_rate.epoch.env.math": 0.9063097514340345, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.7680237489397794, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5025051589399551, "success_rate.epoch.global": 0.7086301369863014, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.991234756097561, "tokens_p.mean_below_band": 6.927791673660977e-13, "tokens_p.mean_in_band": 0.5634014423076923, "tokens_rate.above_band": 0.9213483146067416, "tokens_rate.below_band": 0.0056179775280898875, "tokens_rate.in_band": 0.07303370786516854 }, { "epoch": 0.8394281914893617, "grad_norm": 132.24654694480157, "learning_rate": 1.857143177352845e-07, "loss": 0.4856, "step": 5050, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5084745762711864, "success_rate.epoch.env.logic": 0.45962732919254656, "success_rate.epoch.env.math": 0.9063097514340345, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.7683185091063108, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5025611212036917, "success_rate.epoch.global": 0.7088122605363985, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965122767857143, "tokens_p.mean_in_band": 0.737890625, "tokens_rate.above_band": 0.9180327868852459, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08196721311475409 }, { "epoch": 0.8402593085106383, "grad_norm": 82.75381195497917, "learning_rate": 1.8568877935526963e-07, "loss": 0.4736, "step": 5055, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5084745762711864, "success_rate.epoch.env.logic": 0.45962732919254656, "success_rate.epoch.env.math": 0.9063097514340345, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.7685635709752485, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5025833995554133, "success_rate.epoch.global": 0.7090113496513059, "success_rate.window.env.science": 0.9, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9811126373626373, "tokens_p.mean_in_band": 0.6964285714285714, "tokens_rate.above_band": 0.8666666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13333333333333333 }, { "epoch": 0.8410904255319149, "grad_norm": 110.18599328143114, "learning_rate": 1.8566323087906509e-07, "loss": 0.3668, "step": 5060, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4556213017751479, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5084745762711864, "success_rate.epoch.env.logic": 0.45962732919254656, "success_rate.epoch.env.math": 0.9063992359121299, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.7682875264270613, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5028610167666954, "success_rate.epoch.global": 0.7089368679967204, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6428571428571428, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9963842975206612, "tokens_p.mean_below_band": 5.893525667488575e-10, "tokens_p.mean_in_band": 0.6242327008928571, "tokens_rate.above_band": 0.9615894039735099, "tokens_rate.below_band": 0.0013245033112582781, "tokens_rate.in_band": 0.03708609271523179 }, { "epoch": 0.8419215425531915, "grad_norm": 81.69390817781417, "learning_rate": 1.8563767235201317e-07, "loss": 0.4711, "step": 5065, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4556213017751479, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5084745762711864, "success_rate.epoch.env.logic": 0.45905707196029777, "success_rate.epoch.env.math": 0.9064885496183206, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.7684343967885062, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5028306464790033, "success_rate.epoch.global": 0.7089990441076062, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980127186009539, "tokens_p.mean_in_band": 0.5041852678571429, "tokens_rate.above_band": 0.9573820395738204, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0426179604261796 }, { "epoch": 0.8427526595744681, "grad_norm": 102.2999888297453, "learning_rate": 1.8561210381947397e-07, "loss": 0.527, "step": 5070, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4556213017751479, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5084745762711864, "success_rate.epoch.env.logic": 0.4591584158415842, "success_rate.epoch.env.math": 0.9064885496183206, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.768581081081081, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5028531944948088, "success_rate.epoch.global": 0.709061135371179, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4523809523809524, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9962698938992043, "tokens_p.mean_in_band": 0.6032072368421053, "tokens_rate.above_band": 0.952020202020202, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.047979797979797977 }, { "epoch": 0.8435837765957447, "grad_norm": 134.25351070844295, "learning_rate": 1.8558652532682544e-07, "loss": 0.4385, "step": 5075, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4588235294117647, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5084745762711864, "success_rate.epoch.env.logic": 0.4591584158415842, "success_rate.epoch.env.math": 0.9065776930409915, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.768629934557737, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5031568512708039, "success_rate.epoch.global": 0.7091801936979948, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.999122920997921, "tokens_p.mean_in_band": 0.5834517045454546, "tokens_rate.above_band": 0.9776422764227642, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022357723577235773 }, { "epoch": 0.8444148936170213, "grad_norm": 102.71658622404625, "learning_rate": 1.8556093691946306e-07, "loss": 0.3784, "step": 5080, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4588235294117647, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5084745762711864, "success_rate.epoch.env.logic": 0.4591584158415842, "success_rate.epoch.env.math": 0.9065776930409915, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.7687763713080169, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5031701637026474, "success_rate.epoch.global": 0.7092991546223071, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9897103658536586, "tokens_p.mean_below_band": 8.149072527885437e-09, "tokens_p.mean_in_band": 0.793212890625, "tokens_rate.above_band": 0.9060773480662984, "tokens_rate.below_band": 0.0055248618784530384, "tokens_rate.in_band": 0.08839779005524862 }, { "epoch": 0.8452460106382979, "grad_norm": 150.3481594804001, "learning_rate": 1.8553533864280002e-07, "loss": 0.4964, "step": 5085, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4588235294117647, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.45982694684796044, "success_rate.epoch.env.math": 0.9065776930409915, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.7687117857895847, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5034761022729145, "success_rate.epoch.global": 0.7093609483580869, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9939478584729982, "tokens_p.mean_below_band": 2.9976945370435715e-09, "tokens_p.mean_in_band": 0.7272518382352942, "tokens_rate.above_band": 0.9675675675675676, "tokens_rate.below_band": 0.0018018018018018018, "tokens_rate.in_band": 0.03063063063063063 }, { "epoch": 0.8460771276595744, "grad_norm": 120.21214009748208, "learning_rate": 1.8550973054226697e-07, "loss": 0.3844, "step": 5090, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4588235294117647, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.4599260172626387, "success_rate.epoch.env.math": 0.9065776930409915, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.7688579856721449, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5034983995726635, "success_rate.epoch.global": 0.7094226579520697, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.3888888888888889, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9953583645994262, "tokens_p.mean_in_band": 0.5595965038314177, "tokens_rate.above_band": 0.896695032653869, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10330496734613101 }, { "epoch": 0.846908244680851, "grad_norm": 58.516514937432355, "learning_rate": 1.8548411266331206e-07, "loss": 0.46, "step": 5095, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4588235294117647, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.45935960591133007, "success_rate.epoch.env.math": 0.9066666666666666, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.768955349620893, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5034638474111284, "success_rate.epoch.global": 0.7094447468698966, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9934966216216217, "tokens_p.mean_in_band": 0.5916108630952381, "tokens_rate.above_band": 0.9462915601023018, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05370843989769821 }, { "epoch": 0.8477393617021277, "grad_norm": 72.39119441768321, "learning_rate": 1.854584850514007e-07, "loss": 0.5245, "step": 5100, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4588235294117647, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.46002460024600245, "success_rate.epoch.env.math": 0.9066666666666666, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.7690526315789473, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5035331452559219, "success_rate.epoch.global": 0.7095633247177254, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983231707317073, "tokens_p.mean_in_band": 0.5803865131578947, "tokens_rate.above_band": 0.9700315457413249, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02996845425867508 }, { "epoch": 0.8485704787234043, "grad_norm": 64.96241202138164, "learning_rate": 1.8543284775201564e-07, "loss": 0.5563, "step": 5105, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.4588235294117647, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.46002460024600245, "success_rate.epoch.env.math": 0.9067554709800191, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.7692469499368952, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5035588836805854, "success_rate.epoch.global": 0.7097607395323545, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969512195121951, "tokens_p.mean_in_band": 0.5854779411764706, "tokens_rate.above_band": 0.9786432160804021, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02135678391959799 }, { "epoch": 0.8494015957446809, "grad_norm": 113.45650888270792, "learning_rate": 1.8540720081065682e-07, "loss": 0.5288, "step": 5110, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.4594594594594595, "success_rate.epoch.env.math": 0.9068441064638784, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.7693439865433137, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5030393722246758, "success_rate.epoch.global": 0.7095897853844064, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9982290436835891, "tokens_p.mean_in_band": 0.6148302801724138, "tokens_rate.above_band": 0.9668949771689498, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033105022831050226 }, { "epoch": 0.8502327127659575, "grad_norm": 81.0519607409086, "learning_rate": 1.8538154427284125e-07, "loss": 0.4576, "step": 5115, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.4594594594594595, "success_rate.epoch.env.math": 0.9068441064638784, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.7694893885270014, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.503052590586829, "success_rate.epoch.global": 0.7097080787508486, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.998593009478673, "tokens_p.mean_in_band": 0.5876116071428571, "tokens_rate.above_band": 0.9678899082568807, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03211009174311927 }, { "epoch": 0.851063829787234, "grad_norm": 86.13391690307799, "learning_rate": 1.8535587818410302e-07, "loss": 0.4625, "step": 5120, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.4588957055214724, "success_rate.epoch.env.math": 0.9069325735992403, "success_rate.epoch.env.sat": 0.10714285714285714, "success_rate.epoch.env.science": 0.7697313182199832, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.50299964907371, "success_rate.epoch.global": 0.709751797097518, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9981720055710307, "tokens_p.mean_in_band": 0.6243106617647058, "tokens_rate.above_band": 0.9547872340425532, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04521276595744681 }, { "epoch": 0.8518949468085106, "grad_norm": 68.38127916349538, "learning_rate": 1.8533020258999317e-07, "loss": 0.5685, "step": 5125, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.4588957055214724, "success_rate.epoch.env.math": 0.9069325735992403, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7697147651006712, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5029666223684995, "success_rate.epoch.global": 0.7096774193548387, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9974180911680912, "tokens_p.mean_below_band": 5.2386898864026366e-09, "tokens_p.mean_in_band": 0.5706129807692307, "tokens_rate.above_band": 0.9448183041722745, "tokens_rate.below_band": 0.0026917900403768506, "tokens_rate.in_band": 0.052489905787348586 }, { "epoch": 0.8527260638297872, "grad_norm": 129.56825701233763, "learning_rate": 1.8530451753607953e-07, "loss": 0.5243, "step": 5130, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.45955882352941174, "success_rate.epoch.env.math": 0.9070208728652751, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7696499685600503, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5030290424351681, "success_rate.epoch.global": 0.7097385886496005, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.65, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 1.0005116959064329, "tokens_p.mean_below_band": 2.342858351767063e-09, "tokens_p.mean_in_band": 0.5967548076923077, "tokens_rate.above_band": 0.9416299559471366, "tokens_rate.below_band": 0.0011013215859030838, "tokens_rate.in_band": 0.05726872246696035 }, { "epoch": 0.8535571808510638, "grad_norm": 39.98947624230619, "learning_rate": 1.8527882306794683e-07, "loss": 0.3879, "step": 5135, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.4589963280293758, "success_rate.epoch.env.math": 0.9072847682119205, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7697464906767232, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5030106717045574, "success_rate.epoch.global": 0.7098782138024358, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973868843069874, "tokens_p.mean_in_band": 0.5791015625, "tokens_rate.above_band": 0.9646408839779006, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03535911602209945 }, { "epoch": 0.8543882978723404, "grad_norm": 51.62159186025119, "learning_rate": 1.8525311923119643e-07, "loss": 0.5168, "step": 5140, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.4589963280293758, "success_rate.epoch.env.math": 0.9073724007561437, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7698429319371728, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5030274056868002, "success_rate.epoch.global": 0.7099959421073989, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942748091603053, "tokens_p.mean_in_band": 0.6163194444444444, "tokens_rate.above_band": 0.966789667896679, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033210332103321034 }, { "epoch": 0.855219414893617, "grad_norm": 42.92293315231294, "learning_rate": 1.8522740607144637e-07, "loss": 0.2993, "step": 5145, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.45965770171149145, "success_rate.epoch.env.math": 0.9074598677998111, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7700355722954593, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.503112994876261, "success_rate.epoch.global": 0.7102311123124747, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9954166666666666, "tokens_p.mean_in_band": 0.6368117559523809, "tokens_rate.above_band": 0.946969696969697, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05303030303030303 }, { "epoch": 0.8560505319148937, "grad_norm": 111.22718262520938, "learning_rate": 1.8520168363433115e-07, "loss": 0.5233, "step": 5150, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.4603174603174603, "success_rate.epoch.env.math": 0.9074598677998111, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.7701798410706817, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5031860882745514, "success_rate.epoch.global": 0.7103876806699987, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9918067655867139, "tokens_p.mean_below_band": 5.885958671569824e-07, "tokens_p.mean_in_band": 0.529109733468286, "tokens_rate.above_band": 0.8317481884057971, "tokens_rate.below_band": 0.0004528985507246377, "tokens_rate.in_band": 0.16779891304347827 }, { "epoch": 0.8568816489361702, "grad_norm": 77.42124874726176, "learning_rate": 1.8517595196550186e-07, "loss": 0.3371, "step": 5155, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.4609756097560976, "success_rate.epoch.env.math": 0.9075471698113208, "success_rate.epoch.env.sat": 0.1064516129032258, "success_rate.epoch.env.science": 0.7703719180944422, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5032399996265767, "success_rate.epoch.global": 0.7105263157894737, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9937796208530806, "tokens_p.mean_in_band": 0.6525493421052632, "tokens_rate.above_band": 0.9173913043478261, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08260869565217391 }, { "epoch": 0.8577127659574468, "grad_norm": 75.49665057538526, "learning_rate": 1.8515021111062588e-07, "loss": 0.5233, "step": 5160, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.4609756097560976, "success_rate.epoch.env.math": 0.9077212806026366, "success_rate.epoch.env.sat": 0.1064516129032258, "success_rate.epoch.env.science": 0.7703719180944422, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5032558278803326, "success_rate.epoch.global": 0.710604425256341, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.375, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9991899727148704, "tokens_p.mean_below_band": 1.7848833522293717e-11, "tokens_p.mean_in_band": 0.5237723214285714, "tokens_rate.above_band": 0.953185955786736, "tokens_rate.below_band": 0.0013003901170351106, "tokens_rate.in_band": 0.045513654096228866 }, { "epoch": 0.8585438829787234, "grad_norm": 100.5683597417154, "learning_rate": 1.8512446111538692e-07, "loss": 0.4473, "step": 5165, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3333333333333333, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5112359550561798, "success_rate.epoch.env.logic": 0.46163215590742995, "success_rate.epoch.env.math": 0.9078947368421053, "success_rate.epoch.env.sat": 0.1064516129032258, "success_rate.epoch.env.science": 0.7705157652954688, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5033443596614079, "success_rate.epoch.global": 0.7108385009436505, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.575, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9922945205479452, "tokens_p.mean_in_band": 0.5183012937317785, "tokens_rate.above_band": 0.8469433288710397, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1530566711289603 }, { "epoch": 0.859375, "grad_norm": 59.43381072575893, "learning_rate": 1.8509870202548493e-07, "loss": 0.5055, "step": 5170, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5083798882681564, "success_rate.epoch.env.logic": 0.46163215590742995, "success_rate.epoch.env.math": 0.907981220657277, "success_rate.epoch.env.sat": 0.1064516129032258, "success_rate.epoch.env.science": 0.7706594323873122, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5062954326993385, "success_rate.epoch.global": 0.7109375, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.992715307364577, "tokens_p.mean_below_band": 2.426760537283761e-07, "tokens_p.mean_in_band": 0.43470652829935624, "tokens_rate.above_band": 0.7777514792899408, "tokens_rate.below_band": 0.0016568047337278107, "tokens_rate.in_band": 0.22059171597633137 }, { "epoch": 0.8602061170212766, "grad_norm": 110.38208457277385, "learning_rate": 1.8507293388663595e-07, "loss": 0.418, "step": 5175, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5055555555555555, "success_rate.epoch.env.logic": 0.46228710462287104, "success_rate.epoch.env.math": 0.9080675422138836, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7707072814521176, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5060792961975085, "success_rate.epoch.global": 0.7108628348364517, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9945952316810345, "tokens_p.mean_below_band": 1.862645149230957e-07, "tokens_p.mean_in_band": 0.528577302631579, "tokens_rate.above_band": 0.9066927210552027, "tokens_rate.below_band": 0.0004885197850512946, "tokens_rate.in_band": 0.09281875915974597 }, { "epoch": 0.8610372340425532, "grad_norm": 401.48654276649995, "learning_rate": 1.8504715674457215e-07, "loss": 0.4859, "step": 5180, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5055555555555555, "success_rate.epoch.env.logic": 0.4629404617253949, "success_rate.epoch.env.math": 0.9081537019681349, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7708507089241035, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5061595638637594, "success_rate.epoch.global": 0.7110573042776432, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9908588435374149, "tokens_p.mean_in_band": 0.75640625, "tokens_rate.above_band": 0.9216300940438872, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07836990595611286 }, { "epoch": 0.8618683510638298, "grad_norm": 69.59480462354695, "learning_rate": 1.850213706450416e-07, "loss": 0.5222, "step": 5185, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5055555555555555, "success_rate.epoch.env.logic": 0.4629404617253949, "success_rate.epoch.env.math": 0.9082397003745318, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7707378074197583, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5061571181275822, "success_rate.epoch.global": 0.7110393976065618, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9992788461538461, "tokens_p.mean_in_band": 0.5450846354166666, "tokens_rate.above_band": 0.9605911330049262, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03940886699507389 }, { "epoch": 0.8626994680851063, "grad_norm": 83.24604199451593, "learning_rate": 1.8499557563380836e-07, "loss": 0.3598, "step": 5190, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5055555555555555, "success_rate.epoch.env.logic": 0.4629404617253949, "success_rate.epoch.env.math": 0.9082397003745318, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7708810664444907, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5061701416752853, "success_rate.epoch.global": 0.7111559139784946, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964779896759393, "tokens_p.mean_below_band": 1.6977234433094661e-09, "tokens_p.mean_in_band": 0.754404920212766, "tokens_rate.above_band": 0.9858637263217416, "tokens_rate.below_band": 0.0008481764206955047, "tokens_rate.in_band": 0.013288097257562907 }, { "epoch": 0.863530585106383, "grad_norm": 66.86258879119843, "learning_rate": 1.8496977175665221e-07, "loss": 0.4882, "step": 5195, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5055555555555555, "success_rate.epoch.env.logic": 0.46237864077669905, "success_rate.epoch.env.math": 0.9083255378858747, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7711670480549199, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5061528687819286, "success_rate.epoch.global": 0.7113319011815252, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966517857142857, "tokens_p.mean_in_band": 0.6098845108695652, "tokens_rate.above_band": 0.9655172413793104, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034482758620689655 }, { "epoch": 0.8643617021276596, "grad_norm": 461.39987416908787, "learning_rate": 1.8494395905936875e-07, "loss": 0.4995, "step": 5200, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5082872928176796, "success_rate.epoch.env.logic": 0.46237864077669905, "success_rate.epoch.env.math": 0.9085820895522388, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7711018711018711, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5064186062342412, "success_rate.epoch.global": 0.7114688128772636, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9930555555555556, "tokens_p.mean_in_band": 0.6958705357142857, "tokens_rate.above_band": 0.954983922829582, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04501607717041801 }, { "epoch": 0.8651928191489362, "grad_norm": 62.920761833154074, "learning_rate": 1.8491813758776917e-07, "loss": 0.4371, "step": 5205, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5082872928176796, "success_rate.epoch.env.logic": 0.463030303030303, "success_rate.epoch.env.math": 0.9086672879776329, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7711318795430945, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5064883216088067, "success_rate.epoch.global": 0.7116054677030287, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9938340807174888, "tokens_p.mean_in_band": 0.6256696428571429, "tokens_rate.above_band": 0.8643410852713178, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13565891472868216 }, { "epoch": 0.8660239361702128, "grad_norm": 112.23362344477916, "learning_rate": 1.8489230738768027e-07, "loss": 0.363, "step": 5210, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45348837209302323, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5082872928176796, "success_rate.epoch.env.logic": 0.463680387409201, "success_rate.epoch.env.math": 0.9087523277467412, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7712269047124767, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5065637897285694, "success_rate.epoch.global": 0.7117599785695151, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99481658692185, "tokens_p.mean_in_band": 0.5685177364864865, "tokens_rate.above_band": 0.9713400464756003, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02865995352439969 }, { "epoch": 0.8668550531914894, "grad_norm": 120.54854830860351, "learning_rate": 1.848664685049443e-07, "loss": 0.3702, "step": 5215, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45664739884393063, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5082872928176796, "success_rate.epoch.env.logic": 0.463680387409201, "success_rate.epoch.env.math": 0.9088372093023256, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7714641227706346, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5068802557617194, "success_rate.epoch.global": 0.7120299745751372, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99906600249066, "tokens_p.mean_in_band": 0.5454427083333333, "tokens_rate.above_band": 0.963985594237695, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03601440576230492 }, { "epoch": 0.867686170212766, "grad_norm": 93.79875126033119, "learning_rate": 1.8484062098541897e-07, "loss": 0.3146, "step": 5220, "success_rate.epoch.env.abd": 0.45652173913043476, "success_rate.epoch.env.agentgym:alfworld": 0.45664739884393063, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5082872928176796, "success_rate.epoch.env.logic": 0.463680387409201, "success_rate.epoch.env.math": 0.9089219330855018, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7717008493888544, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5069094785254826, "success_rate.epoch.global": 0.7122609974595534, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9837682038834952, "tokens_p.mean_in_band": 0.8057291666666667, "tokens_rate.above_band": 0.8728813559322034, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1271186440677966 }, { "epoch": 0.8685172872340425, "grad_norm": 96.17477032570376, "learning_rate": 1.848147648749773e-07, "loss": 0.4337, "step": 5225, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.45664739884393063, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5082872928176796, "success_rate.epoch.env.logic": 0.463680387409201, "success_rate.epoch.env.math": 0.9089219330855018, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7718898778720762, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5060436421610358, "success_rate.epoch.global": 0.7123196151790486, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9954633204633204, "tokens_p.mean_in_band": 0.10686657475490197, "tokens_rate.above_band": 0.28405352050888355, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.7159464794911164 }, { "epoch": 0.8693484042553191, "grad_norm": 150.19682517668573, "learning_rate": 1.8478890021950754e-07, "loss": 0.4695, "step": 5230, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.45664739884393063, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5082872928176796, "success_rate.epoch.env.logic": 0.463680387409201, "success_rate.epoch.env.math": 0.9089219330855018, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7720785935884178, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5060607981352487, "success_rate.epoch.global": 0.7124732905982906, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9823863636363637, "tokens_p.mean_in_band": 0.7083333333333334, "tokens_rate.above_band": 0.9016393442622951, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09836065573770492 }, { "epoch": 0.8701795212765957, "grad_norm": 174.023746545335, "learning_rate": 1.847630270649131e-07, "loss": 0.5299, "step": 5235, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.45664739884393063, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5082872928176796, "success_rate.epoch.env.logic": 0.463680387409201, "success_rate.epoch.env.math": 0.9091751621872104, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7721728344014885, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5060923863093195, "success_rate.epoch.global": 0.7126651541438677, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.8571428571428572, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9858156028368794, "tokens_p.mean_below_band": 9.049472282640636e-11, "tokens_p.mean_in_band": 0.7245279947916666, "tokens_rate.above_band": 0.8493975903614458, "tokens_rate.below_band": 0.006024096385542169, "tokens_rate.in_band": 0.14457831325301204 }, { "epoch": 0.8710106382978723, "grad_norm": 145.87926676819896, "learning_rate": 1.847371454571125e-07, "loss": 0.5801, "step": 5240, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.45664739884393063, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5082872928176796, "success_rate.epoch.env.logic": 0.463680387409201, "success_rate.epoch.env.math": 0.9092592592592592, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7719479446395373, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5060795869738738, "success_rate.epoch.global": 0.7125900240064017, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9879658385093167, "tokens_p.mean_in_band": 0.65234375, "tokens_rate.above_band": 0.9044943820224719, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09550561797752809 }, { "epoch": 0.871841755319149, "grad_norm": 95.14551052244866, "learning_rate": 1.847112554420393e-07, "loss": 0.4015, "step": 5245, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.45977011494252873, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.463768115942029, "success_rate.epoch.env.math": 0.9092592592592592, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7720891824938068, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5066298961245127, "success_rate.epoch.global": 0.7127248500999334, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.998802570093458, "tokens_p.mean_in_band": 0.5251885775862069, "tokens_rate.above_band": 0.9736123748862603, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026387625113739762 }, { "epoch": 0.8726728723404256, "grad_norm": 96.40747778330923, "learning_rate": 1.8468535706564186e-07, "loss": 0.4411, "step": 5250, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.46285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.463855421686747, "success_rate.epoch.env.math": 0.9093432007400555, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.7721362229102167, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5069303793569252, "success_rate.epoch.global": 0.7127829560585885, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9964145106861643, "tokens_p.mean_in_band": 0.546875, "tokens_rate.above_band": 0.961081081081081, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03891891891891892 }, { "epoch": 0.8735039893617021, "grad_norm": 145.66526206987086, "learning_rate": 1.846594503738835e-07, "loss": 0.335, "step": 5255, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.46285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.463855421686747, "success_rate.epoch.env.math": 0.9093432007400555, "success_rate.epoch.env.sat": 0.10576923076923077, "success_rate.epoch.env.science": 0.7722772277227723, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5069122803408292, "success_rate.epoch.global": 0.71280276816609, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9937796208530806, "tokens_p.mean_in_band": 0.6166666666666667, "tokens_rate.above_band": 0.9336283185840708, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06637168141592921 }, { "epoch": 0.8743351063829787, "grad_norm": 99.61723336966499, "learning_rate": 1.8463353541274226e-07, "loss": 0.4987, "step": 5260, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.46285714285714286, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.463855421686747, "success_rate.epoch.env.math": 0.9094269870609981, "success_rate.epoch.env.sat": 0.10576923076923077, "success_rate.epoch.env.science": 0.7724649629018961, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5069369641135626, "success_rate.epoch.global": 0.7129937491687725, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9947277607361963, "tokens_p.mean_in_band": 0.5895724826388888, "tokens_rate.above_band": 0.9476744186046512, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05232558139534884 }, { "epoch": 0.8751662234042553, "grad_norm": 77.30451222916581, "learning_rate": 1.846076122282108e-07, "loss": 0.4781, "step": 5265, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.463855421686747, "success_rate.epoch.env.math": 0.9095940959409594, "success_rate.epoch.env.sat": 0.10576923076923077, "success_rate.epoch.env.science": 0.7725587144622992, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5072381285219546, "success_rate.epoch.global": 0.713184476342371, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9925814075630253, "tokens_p.mean_in_band": 0.7912946428571429, "tokens_rate.above_band": 0.9444444444444444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05555555555555555 }, { "epoch": 0.8759973404255319, "grad_norm": 95.91580395374355, "learning_rate": 1.8458168086629647e-07, "loss": 0.3916, "step": 5270, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.463855421686747, "success_rate.epoch.env.math": 0.9095940959409594, "success_rate.epoch.env.sat": 0.10576923076923077, "success_rate.epoch.env.science": 0.7727459860024701, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5072551532074246, "success_rate.epoch.global": 0.713336875664187, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99375, "tokens_p.mean_in_band": 0.5911931818181818, "tokens_rate.above_band": 0.9508928571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049107142857142856 }, { "epoch": 0.8768284574468085, "grad_norm": 98.98842252879716, "learning_rate": 1.8455574137302107e-07, "loss": 0.4192, "step": 5275, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.46329723225030084, "success_rate.epoch.env.math": 0.9097605893186004, "success_rate.epoch.env.sat": 0.10576923076923077, "success_rate.epoch.env.science": 0.7728395061728395, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.507228046308476, "success_rate.epoch.global": 0.7133943979822116, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9981737012987013, "tokens_p.mean_in_band": 0.6482638888888889, "tokens_rate.above_band": 0.9319213313161876, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0680786686838124 }, { "epoch": 0.8776595744680851, "grad_norm": 146.98766796897505, "learning_rate": 1.845297937944208e-07, "loss": 0.5282, "step": 5280, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.46274038461538464, "success_rate.epoch.env.math": 0.9098436062557498, "success_rate.epoch.env.sat": 0.10576923076923077, "success_rate.epoch.env.science": 0.7729329494035376, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5071934656296515, "success_rate.epoch.global": 0.7134138251293618, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996259842519685, "tokens_p.mean_in_band": 0.4921875, "tokens_rate.above_band": 0.9491778774289985, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05082212257100149 }, { "epoch": 0.8784906914893617, "grad_norm": 68.15240195818971, "learning_rate": 1.845038381765463e-07, "loss": 0.5068, "step": 5285, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.46274038461538464, "success_rate.epoch.env.math": 0.9098436062557498, "success_rate.epoch.env.sat": 0.10576923076923077, "success_rate.epoch.env.science": 0.7731662214916787, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5072146721831189, "success_rate.epoch.global": 0.7136038186157518, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.2916666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9921482412060302, "tokens_p.mean_in_band": 0.6516927083333334, "tokens_rate.above_band": 0.8805309734513275, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11946902654867257 }, { "epoch": 0.8793218085106383, "grad_norm": 72.33138589840588, "learning_rate": 1.8447787456546248e-07, "loss": 0.3813, "step": 5290, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.4628297362110312, "success_rate.epoch.env.math": 0.9099264705882353, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7732593961799138, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5072080785241453, "success_rate.epoch.global": 0.7135665076841547, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9920176630434783, "tokens_p.mean_below_band": 1.8189894035458565e-09, "tokens_p.mean_in_band": 0.6848958333333334, "tokens_rate.above_band": 0.9363867684478372, "tokens_rate.below_band": 0.002544529262086514, "tokens_rate.in_band": 0.061068702290076333 }, { "epoch": 0.8801529255319149, "grad_norm": 153.70816550968183, "learning_rate": 1.8445190300724828e-07, "loss": 0.6103, "step": 5295, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.46347305389221555, "success_rate.epoch.env.math": 0.9100917431192661, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7733990147783252, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5072942793251113, "success_rate.epoch.global": 0.7137940164151443, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9928909952606635, "tokens_p.mean_in_band": 0.83984375, "tokens_rate.above_band": 0.985981308411215, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014018691588785047 }, { "epoch": 0.8809840425531915, "grad_norm": 179.64357932032007, "learning_rate": 1.8442592354799694e-07, "loss": 0.5463, "step": 5300, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.46347305389221555, "success_rate.epoch.env.math": 0.9100917431192661, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7733990147783252, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5072942793251113, "success_rate.epoch.global": 0.7137940164151443, "success_rate.window.env.logic": 0.25, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.998100630733945, "tokens_p.mean_in_band": 0.556796875, "tokens_rate.above_band": 0.9721293199554069, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027870680044593088 }, { "epoch": 0.8818151595744681, "grad_norm": 330.46042255671426, "learning_rate": 1.8439993623381557e-07, "loss": 0.3594, "step": 5305, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.46347305389221555, "success_rate.epoch.env.math": 0.9103385178408051, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7732868280672959, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5073065145987941, "success_rate.epoch.global": 0.7138510384971557, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9927130044843049, "tokens_p.mean_below_band": 1.1641532182693481e-10, "tokens_p.mean_in_band": 0.8079427083333334, "tokens_rate.above_band": 0.9591397849462365, "tokens_rate.below_band": 0.002150537634408602, "tokens_rate.in_band": 0.03870967741935484 }, { "epoch": 0.8826462765957447, "grad_norm": 103.69438618589606, "learning_rate": 1.8437394111082537e-07, "loss": 0.4821, "step": 5310, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.46347305389221555, "success_rate.epoch.env.math": 0.9104204753199269, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7733798195242002, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5073224190475236, "success_rate.epoch.global": 0.7139645596403068, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9989837398373984, "tokens_p.mean_in_band": 0.6741477272727273, "tokens_rate.above_band": 0.9306431273644389, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06935687263556116 }, { "epoch": 0.8834773936170213, "grad_norm": 120.24352776037023, "learning_rate": 1.8434793822516123e-07, "loss": 0.4081, "step": 5315, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.46411483253588515, "success_rate.epoch.env.math": 0.9104204753199269, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7734727347273472, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5073892093972342, "success_rate.epoch.global": 0.7140779907468605, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9958105335157319, "tokens_p.mean_in_band": 0.5834375, "tokens_rate.above_band": 0.966931216931217, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03306878306878307 }, { "epoch": 0.8843085106382979, "grad_norm": 88.61726468216064, "learning_rate": 1.8432192762297193e-07, "loss": 0.4345, "step": 5320, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.4647550776583035, "success_rate.epoch.env.math": 0.9104204753199269, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7734070887113297, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5074414456796343, "success_rate.epoch.global": 0.7140969745012551, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9933274021352313, "tokens_p.mean_in_band": 0.7868923611111112, "tokens_rate.above_band": 0.9689655172413794, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03103448275862069 }, { "epoch": 0.8851396276595744, "grad_norm": 229.29870823855558, "learning_rate": 1.8429590935041995e-07, "loss": 0.5412, "step": 5325, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.464839094159714, "success_rate.epoch.env.math": 0.9104204753199269, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.77329510546795, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5074389032485462, "success_rate.epoch.global": 0.7139838901360095, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.4047619047619048, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.994473102431835, "tokens_p.mean_in_band": 0.5250673491379311, "tokens_rate.above_band": 0.903462050599201, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09653794940079893 }, { "epoch": 0.885970744680851, "grad_norm": 254.57616947749648, "learning_rate": 1.8426988345368132e-07, "loss": 0.4033, "step": 5330, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.464839094159714, "success_rate.epoch.env.math": 0.9104204753199269, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7735270049099836, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5074599850160039, "success_rate.epoch.global": 0.714172604908947, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957757296466974, "tokens_p.mean_in_band": 0.660888671875, "tokens_rate.above_band": 0.9760119940029985, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0239880059970015 }, { "epoch": 0.8868018617021277, "grad_norm": 408.7961185472496, "learning_rate": 1.8424384997894562e-07, "loss": 0.4684, "step": 5335, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.464922711058264, "success_rate.epoch.env.math": 0.9104204753199269, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7735733278789119, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5074717977312291, "success_rate.epoch.global": 0.714153805566548, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9999066467513069, "tokens_p.mean_in_band": 0.5439236111111111, "tokens_rate.above_band": 0.9674855491329479, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03251445086705202 }, { "epoch": 0.8876329787234043, "grad_norm": 58.46042877756656, "learning_rate": 1.8421780897241593e-07, "loss": 0.4678, "step": 5340, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.510989010989011, "success_rate.epoch.env.logic": 0.464922711058264, "success_rate.epoch.env.math": 0.9105022831050228, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.773758430410791, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5074960623054996, "success_rate.epoch.global": 0.7143422093329818, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9911723163841808, "tokens_p.mean_in_band": 0.8697916666666666, "tokens_rate.above_band": 0.9833333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016666666666666666 }, { "epoch": 0.8884640957446809, "grad_norm": 22.362169171649757, "learning_rate": 1.841917604803086e-07, "loss": 0.4257, "step": 5345, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081967213114754, "success_rate.epoch.env.logic": 0.4643705463182898, "success_rate.epoch.env.math": 0.9105839416058394, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7738970588235294, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5072120470778674, "success_rate.epoch.global": 0.714304531085353, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9982525629077353, "tokens_p.mean_in_band": 0.6082763671875, "tokens_rate.above_band": 0.9710407239819004, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02895927601809955 }, { "epoch": 0.8892952127659575, "grad_norm": 53.66031052460184, "learning_rate": 1.8416570454885336e-07, "loss": 0.4381, "step": 5350, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081967213114754, "success_rate.epoch.env.logic": 0.4643705463182898, "success_rate.epoch.env.math": 0.9105839416058394, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7739893834218048, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.507220440223165, "success_rate.epoch.global": 0.714379773505399, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9983974358974359, "tokens_p.mean_in_band": 0.3391170058139535, "tokens_rate.above_band": 0.8391019644527596, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16089803554724041 }, { "epoch": 0.890126329787234, "grad_norm": 57.13801330016607, "learning_rate": 1.8413964122429308e-07, "loss": 0.283, "step": 5355, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081967213114754, "success_rate.epoch.env.logic": 0.4643705463182898, "success_rate.epoch.env.math": 0.9107468123861566, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7741277290348908, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5072478235316562, "success_rate.epoch.global": 0.7145677062771417, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9890939597315436, "tokens_p.mean_in_band": 0.8190104166666666, "tokens_rate.above_band": 0.9802631578947368, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019736842105263157 }, { "epoch": 0.8909574468085106, "grad_norm": 59.69692465871971, "learning_rate": 1.8411357055288385e-07, "loss": 0.474, "step": 5360, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081967213114754, "success_rate.epoch.env.logic": 0.4643705463182898, "success_rate.epoch.env.math": 0.9107468123861566, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7742198653885376, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.507256199563806, "success_rate.epoch.global": 0.7146428101565584, "success_rate.window.env.logic": 0.25, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5416666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9994868035190616, "tokens_p.mean_in_band": 0.5507089120370371, "tokens_rate.above_band": 0.969300739056282, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030699260943718023 }, { "epoch": 0.8917885638297872, "grad_norm": 120.55733575071133, "learning_rate": 1.8408749258089462e-07, "loss": 0.3917, "step": 5365, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4659090909090909, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081967213114754, "success_rate.epoch.env.logic": 0.4643705463182898, "success_rate.epoch.env.math": 0.910828025477707, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7743119266055046, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5072719517736712, "success_rate.epoch.global": 0.7147553918990005, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9948770491803278, "tokens_p.mean_in_band": 0.72421875, "tokens_rate.above_band": 0.9838709677419355, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016129032258064516 }, { "epoch": 0.8926196808510638, "grad_norm": 56.88185408967301, "learning_rate": 1.8406140735460745e-07, "loss": 0.4158, "step": 5370, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4632768361581921, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5054347826086957, "success_rate.epoch.env.logic": 0.4643705463182898, "success_rate.epoch.env.math": 0.910828025477707, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7744039127776645, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5067899329298968, "success_rate.epoch.global": 0.7146424815983176, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.375, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9927288475565281, "tokens_p.mean_in_band": 0.6876346982758621, "tokens_rate.above_band": 0.9219905850706119, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07800941492938802 }, { "epoch": 0.8934507978723404, "grad_norm": 126.20126337988553, "learning_rate": 1.8403531492031723e-07, "loss": 0.415, "step": 5375, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4632768361581921, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5054347826086957, "success_rate.epoch.env.logic": 0.4643705463182898, "success_rate.epoch.env.math": 0.9109900090826522, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.7744958239967407, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5068130142775351, "success_rate.epoch.global": 0.7147924330005255, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9947996183206107, "tokens_p.mean_in_band": 0.6330180921052632, "tokens_rate.above_band": 0.9718100890207715, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028189910979228485 }, { "epoch": 0.894281914893617, "grad_norm": 54.55800904396644, "learning_rate": 1.8400921532433164e-07, "loss": 0.3074, "step": 5380, "success_rate.epoch.env.abd": 0.44680851063829785, "success_rate.epoch.env.agentgym:alfworld": 0.4632768361581921, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5054347826086957, "success_rate.epoch.env.logic": 0.4638196915776987, "success_rate.epoch.env.math": 0.9111514052583862, "success_rate.epoch.env.sat": 0.10793650793650794, "success_rate.epoch.env.science": 0.7744758803175249, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5070035411673789, "success_rate.epoch.global": 0.7147355990027555, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.575, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9920454545454546, "tokens_p.mean_in_band": 0.7213541666666666, "tokens_rate.above_band": 0.8870967741935484, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11290322580645161 }, { "epoch": 0.8951130319148937, "grad_norm": 67.10472061459373, "learning_rate": 1.8398310861297107e-07, "loss": 0.3511, "step": 5385, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4632768361581921, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5054347826086957, "success_rate.epoch.env.logic": 0.46445497630331756, "success_rate.epoch.env.math": 0.9112318840579711, "success_rate.epoch.env.sat": 0.10793650793650794, "success_rate.epoch.env.science": 0.7745676500508647, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5081246644358861, "success_rate.epoch.global": 0.7149226330972988, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996155753968254, "tokens_p.mean_in_band": 0.8640625, "tokens_rate.above_band": 0.9901768172888016, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009823182711198428 }, { "epoch": 0.8959441489361702, "grad_norm": 200.31084704830042, "learning_rate": 1.8395699483256853e-07, "loss": 0.4486, "step": 5390, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46629213483146065, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5054347826086957, "success_rate.epoch.env.logic": 0.4645390070921986, "success_rate.epoch.env.math": 0.9112318840579711, "success_rate.epoch.env.sat": 0.10793650793650794, "success_rate.epoch.env.science": 0.7746593451291438, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.508414757575925, "success_rate.epoch.global": 0.7149783776700301, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9975945857795173, "tokens_p.mean_in_band": 0.6343245967741935, "tokens_rate.above_band": 0.961128526645768, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038871473354231974 }, { "epoch": 0.8967752659574468, "grad_norm": 69.37958639655824, "learning_rate": 1.8393087402946957e-07, "loss": 0.3711, "step": 5395, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46629213483146065, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5054347826086957, "success_rate.epoch.env.logic": 0.4645390070921986, "success_rate.epoch.env.math": 0.9112318840579711, "success_rate.epoch.env.sat": 0.11075949367088607, "success_rate.epoch.env.science": 0.7748425116846169, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5086880441477297, "success_rate.epoch.global": 0.715165007857517, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9898389570552147, "tokens_p.mean_in_band": 0.8142755681818182, "tokens_rate.above_band": 0.9367816091954023, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06321839080459771 }, { "epoch": 0.8976063829787234, "grad_norm": 203.84137353491911, "learning_rate": 1.8390474625003222e-07, "loss": 0.3331, "step": 5400, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46629213483146065, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5054347826086957, "success_rate.epoch.env.logic": 0.4645390070921986, "success_rate.epoch.env.math": 0.9112318840579711, "success_rate.epoch.env.sat": 0.11041009463722397, "success_rate.epoch.env.science": 0.7750253807106599, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.508672905056128, "success_rate.epoch.global": 0.7152205208742312, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.987145390070922, "tokens_p.mean_in_band": 0.7509068080357143, "tokens_rate.above_band": 0.834319526627219, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16568047337278108 }, { "epoch": 0.8984375, "grad_norm": 41.302546137968896, "learning_rate": 1.8387861154062692e-07, "loss": 0.4441, "step": 5405, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46629213483146065, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5027027027027027, "success_rate.epoch.env.logic": 0.4645390070921986, "success_rate.epoch.env.math": 0.9113924050632911, "success_rate.epoch.env.sat": 0.11006289308176101, "success_rate.epoch.env.science": 0.7750710515631344, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5084117150921587, "success_rate.epoch.global": 0.7151451739471619, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9844774590163935, "tokens_p.mean_in_band": 0.7504300458715596, "tokens_rate.above_band": 0.8484005563282336, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15159944367176634 }, { "epoch": 0.8992686170212766, "grad_norm": 198.3513399811832, "learning_rate": 1.8385246994763633e-07, "loss": 0.4245, "step": 5410, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46629213483146065, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5027027027027027, "success_rate.epoch.env.logic": 0.4651711924439197, "success_rate.epoch.env.math": 0.9113924050632911, "success_rate.epoch.env.sat": 0.11006289308176101, "success_rate.epoch.env.science": 0.7752991279659298, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5084899207062057, "success_rate.epoch.global": 0.7153685311029796, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99231843575419, "tokens_p.mean_in_band": 0.5631793478260869, "tokens_rate.above_band": 0.8861386138613861, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11386138613861387 }, { "epoch": 0.9000997340425532, "grad_norm": 98.05155018026738, "learning_rate": 1.838263215174554e-07, "loss": 0.3299, "step": 5415, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46629213483146065, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5027027027027027, "success_rate.epoch.env.logic": 0.4651711924439197, "success_rate.epoch.env.math": 0.9114724480578139, "success_rate.epoch.env.sat": 0.11006289308176101, "success_rate.epoch.env.science": 0.7754357519254155, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5085096177020246, "success_rate.epoch.global": 0.7155172413793104, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99140625, "tokens_p.mean_in_band": 0.8125, "tokens_rate.above_band": 0.96, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04 }, { "epoch": 0.9009308510638298, "grad_norm": 67.32561951735258, "learning_rate": 1.838001662964912e-07, "loss": 0.2388, "step": 5420, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46629213483146065, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5027027027027027, "success_rate.epoch.env.logic": 0.4651711924439197, "success_rate.epoch.env.math": 0.9116321009918846, "success_rate.epoch.env.sat": 0.11006289308176101, "success_rate.epoch.env.science": 0.7756630896942701, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5085447986750178, "success_rate.epoch.global": 0.7157771107921179, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9918355855855856, "tokens_p.mean_in_band": 0.7578125, "tokens_rate.above_band": 0.9823008849557522, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017699115044247787 }, { "epoch": 0.9017619680851063, "grad_norm": 115.12481471270185, "learning_rate": 1.837740043311628e-07, "loss": 0.4129, "step": 5425, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46629213483146065, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5027027027027027, "success_rate.epoch.env.logic": 0.4651711924439197, "success_rate.epoch.env.math": 0.9108108108108108, "success_rate.epoch.env.sat": 0.11006289308176101, "success_rate.epoch.env.science": 0.7759805903760615, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5084989996296285, "success_rate.epoch.global": 0.715943162560292, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9950657894736842, "tokens_p.mean_in_band": 0.6008112980769231, "tokens_rate.above_band": 0.868020304568528, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1319796954314721 }, { "epoch": 0.902593085106383, "grad_norm": 91.58198245798793, "learning_rate": 1.837478356679013e-07, "loss": 0.4245, "step": 5430, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46629213483146065, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4651711924439197, "success_rate.epoch.env.math": 0.9108108108108108, "success_rate.epoch.env.sat": 0.11006289308176101, "success_rate.epoch.env.science": 0.7761616161616162, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5082697562735242, "success_rate.epoch.global": 0.7159979155810318, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962605384824585, "tokens_p.mean_below_band": 3.9301812648773193e-07, "tokens_p.mean_in_band": 0.5061860068259386, "tokens_rate.above_band": 0.9259632334424578, "tokens_rate.below_band": 0.0002518257365902795, "tokens_rate.in_band": 0.0737849408209519 }, { "epoch": 0.9034242021276596, "grad_norm": 79.6995287275183, "learning_rate": 1.8372166035314968e-07, "loss": 0.2948, "step": 5435, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4692737430167598, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4651711924439197, "success_rate.epoch.env.math": 0.9109712230215827, "success_rate.epoch.env.sat": 0.11006289308176101, "success_rate.epoch.env.science": 0.7762068269036558, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.508559504558807, "success_rate.epoch.global": 0.7161458333333334, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974391805377721, "tokens_p.mean_below_band": 3.1650415621697903e-10, "tokens_p.mean_in_band": 0.6010044642857143, "tokens_rate.above_band": 0.9726027397260274, "tokens_rate.below_band": 0.0012453300124533001, "tokens_rate.in_band": 0.026151930261519303 }, { "epoch": 0.9042553191489362, "grad_norm": 162.34376600416132, "learning_rate": 1.8369547843336272e-07, "loss": 0.4836, "step": 5440, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4692737430167598, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4651711924439197, "success_rate.epoch.env.math": 0.9111310592459605, "success_rate.epoch.env.sat": 0.11006289308176101, "success_rate.epoch.env.science": 0.7760290556900726, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5085578741052429, "success_rate.epoch.global": 0.7161441394562248, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.7222222222222222, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9973880597014926, "tokens_p.mean_below_band": 1.3597309589385986e-07, "tokens_p.mean_in_band": 0.5050403225806451, "tokens_rate.above_band": 0.9544159544159544, "tokens_rate.below_band": 0.0014245014245014246, "tokens_rate.in_band": 0.04415954415954416 }, { "epoch": 0.9050864361702128, "grad_norm": 58.87048100575551, "learning_rate": 1.8366928995500685e-07, "loss": 0.3133, "step": 5445, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4692737430167598, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4658018867924528, "success_rate.epoch.env.math": 0.9112107623318386, "success_rate.epoch.env.sat": 0.11006289308176101, "success_rate.epoch.env.science": 0.7761645493042952, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5086347732914823, "success_rate.epoch.global": 0.7163286531461258, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9942908653846154, "tokens_p.mean_in_band": 0.8380859375, "tokens_rate.above_band": 0.9765258215962441, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023474178403755867 }, { "epoch": 0.9059175531914894, "grad_norm": 76.48996585799078, "learning_rate": 1.8364309496456027e-07, "loss": 0.3129, "step": 5450, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4692737430167598, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4658018867924528, "success_rate.epoch.env.math": 0.9112903225806451, "success_rate.epoch.env.sat": 0.109717868338558, "success_rate.epoch.env.science": 0.7762547873412619, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.508618843613534, "success_rate.epoch.global": 0.7163461538461539, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9962628589798542, "tokens_p.mean_in_band": 0.671926738410596, "tokens_rate.above_band": 0.9392109500805152, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0607890499194847 }, { "epoch": 0.906748670212766, "grad_norm": 72.81418129946775, "learning_rate": 1.8361689350851266e-07, "loss": 0.2836, "step": 5455, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.4658018867924528, "success_rate.epoch.env.math": 0.9113697403760072, "success_rate.epoch.env.sat": 0.109717868338558, "success_rate.epoch.env.science": 0.7762998790810157, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.508898206226314, "success_rate.epoch.global": 0.7164566826860631, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9940124671916011, "tokens_p.mean_in_band": 0.8459821428571429, "tokens_rate.above_band": 0.9819587628865979, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01804123711340206 }, { "epoch": 0.9075797872340425, "grad_norm": 88.36304180432292, "learning_rate": 1.8359068563336514e-07, "loss": 0.5004, "step": 5460, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5026737967914439, "success_rate.epoch.env.logic": 0.4658018867924528, "success_rate.epoch.env.math": 0.9115281501340483, "success_rate.epoch.env.sat": 0.109717868338558, "success_rate.epoch.env.science": 0.7764350453172205, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5091679673886493, "success_rate.epoch.global": 0.7166774821544452, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998471685971686, "tokens_p.mean_in_band": 0.7217881944444444, "tokens_rate.above_band": 0.9885496183206107, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011450381679389313 }, { "epoch": 0.9084109042553191, "grad_norm": 53.24133699265181, "learning_rate": 1.835644713856303e-07, "loss": 0.4388, "step": 5465, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5026737967914439, "success_rate.epoch.env.logic": 0.4658018867924528, "success_rate.epoch.env.math": 0.911685994647636, "success_rate.epoch.env.sat": 0.109717868338558, "success_rate.epoch.env.science": 0.7766599597585513, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5092027636572783, "success_rate.epoch.global": 0.7169346473029046, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9878571428571429, "tokens_p.mean_in_band": 0.833984375, "tokens_rate.above_band": 0.9562841530054644, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04371584699453552 }, { "epoch": 0.9092420212765957, "grad_norm": 71.85357657503724, "learning_rate": 1.8353825081183202e-07, "loss": 0.4464, "step": 5470, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5026737967914439, "success_rate.epoch.env.logic": 0.46588235294117647, "success_rate.epoch.env.math": 0.9117647058823529, "success_rate.epoch.env.sat": 0.109717868338558, "success_rate.epoch.env.science": 0.7767497988736927, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5092254015207858, "success_rate.epoch.global": 0.716988467020863, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962121212121212, "tokens_p.mean_in_band": 0.6067708333333334, "tokens_rate.above_band": 0.9635036496350365, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0364963503649635 }, { "epoch": 0.9100731382978723, "grad_norm": 126.49033226744454, "learning_rate": 1.8351202395850538e-07, "loss": 0.4302, "step": 5475, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5026737967914439, "success_rate.epoch.env.logic": 0.46650998824911866, "success_rate.epoch.env.math": 0.9118432769367765, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7768395657418576, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5092665928746015, "success_rate.epoch.global": 0.717042217042217, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9956290192926045, "tokens_p.mean_in_band": 0.6981026785714286, "tokens_rate.above_band": 0.9174041297935103, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08259587020648967 }, { "epoch": 0.910904255319149, "grad_norm": 135.66130541386818, "learning_rate": 1.8348579087219661e-07, "loss": 0.346, "step": 5480, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5026737967914439, "success_rate.epoch.env.logic": 0.46650998824911866, "success_rate.epoch.env.math": 0.9119217081850534, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7769292604501608, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5092818770524724, "success_rate.epoch.global": 0.7171521035598706, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9857772435897436, "tokens_p.mean_in_band": 0.65703125, "tokens_rate.above_band": 0.9397590361445783, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.060240963855421686 }, { "epoch": 0.9117353723404256, "grad_norm": 219.08448873485182, "learning_rate": 1.8345955159946305e-07, "loss": 0.4447, "step": 5485, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5026737967914439, "success_rate.epoch.env.logic": 0.4671361502347418, "success_rate.epoch.env.math": 0.912, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7771084337349398, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5093622067875041, "success_rate.epoch.global": 0.7173716207476394, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9937193627450981, "tokens_p.mean_in_band": 0.5677734375, "tokens_rate.above_band": 0.9107142857142857, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08928571428571429 }, { "epoch": 0.9125664893617021, "grad_norm": 265.95525225476683, "learning_rate": 1.8343330618687294e-07, "loss": 0.5568, "step": 5490, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5026737967914439, "success_rate.epoch.env.logic": 0.4671361502347418, "success_rate.epoch.env.math": 0.9120781527531083, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7772873194221509, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5093855739184423, "success_rate.epoch.global": 0.7175542916235781, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9904891304347826, "tokens_p.mean_in_band": 0.6, "tokens_rate.above_band": 0.9484536082474226, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05154639175257732 }, { "epoch": 0.9133976063829787, "grad_norm": 113.36668262783957, "learning_rate": 1.8340705468100547e-07, "loss": 0.3579, "step": 5495, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4722222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5053191489361702, "success_rate.epoch.env.logic": 0.4671361502347418, "success_rate.epoch.env.math": 0.9120781527531083, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7773547094188377, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5096321868403889, "success_rate.epoch.global": 0.7176804855998967, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9963834269662921, "tokens_p.mean_in_band": 0.613037109375, "tokens_rate.above_band": 0.9910913140311804, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008908685968819599 }, { "epoch": 0.9142287234042553, "grad_norm": 91.15157953329543, "learning_rate": 1.833807971284506e-07, "loss": 0.3707, "step": 5500, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4696132596685083, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5053191489361702, "success_rate.epoch.env.logic": 0.4671361502347418, "success_rate.epoch.env.math": 0.9122340425531915, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7774439102564102, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5094172893934745, "success_rate.epoch.global": 0.717733608673206, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973529411764706, "tokens_p.mean_in_band": 0.6429036458333334, "tokens_rate.above_band": 0.9860788863109049, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013921113689095127 }, { "epoch": 0.9150598404255319, "grad_norm": 85.7865363983733, "learning_rate": 1.8335453357580912e-07, "loss": 0.3479, "step": 5505, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4696132596685083, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5053191489361702, "success_rate.epoch.env.logic": 0.46776084407971863, "success_rate.epoch.env.math": 0.9122340425531915, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7775330396475771, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5094821824149421, "success_rate.epoch.global": 0.7178428589859372, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9897260273972602, "tokens_p.mean_in_band": 0.7274305555555556, "tokens_rate.above_band": 0.9240506329113924, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0759493670886076 }, { "epoch": 0.9158909574468085, "grad_norm": 123.57800215684804, "learning_rate": 1.8332826406969235e-07, "loss": 0.301, "step": 5510, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.4696132596685083, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5079365079365079, "success_rate.epoch.env.logic": 0.4672131147540984, "success_rate.epoch.env.math": 0.912311780336581, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7777555511102221, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5096976259531923, "success_rate.epoch.global": 0.7180048975383426, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980396065922382, "tokens_p.mean_in_band": 0.6572779605263158, "tokens_rate.above_band": 0.99, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01 }, { "epoch": 0.9167220744680851, "grad_norm": 90.49517374831306, "learning_rate": 1.8330198865672226e-07, "loss": 0.418, "step": 5515, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46703296703296704, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5079365079365079, "success_rate.epoch.env.logic": 0.4672131147540984, "success_rate.epoch.env.math": 0.912311780336581, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7779776179056754, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5094832417859115, "success_rate.epoch.global": 0.71809401159047, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9965277777777778, "tokens_p.mean_in_band": 0.6599392361111112, "tokens_rate.above_band": 0.9896907216494846, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010309278350515464 }, { "epoch": 0.9175531914893617, "grad_norm": 67.32676576008845, "learning_rate": 1.8327570738353125e-07, "loss": 0.5005, "step": 5520, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46703296703296704, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5079365079365079, "success_rate.epoch.env.logic": 0.4666666666666667, "success_rate.epoch.env.math": 0.9124668435013262, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7778665601278466, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5094375651767739, "success_rate.epoch.global": 0.718018018018018, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.4375, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9976081530782029, "tokens_p.mean_in_band": 0.5864415322580645, "tokens_rate.above_band": 0.9509493670886076, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0490506329113924 }, { "epoch": 0.9183843085106383, "grad_norm": 149.87406036329205, "learning_rate": 1.8324942029676214e-07, "loss": 0.4205, "step": 5525, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46703296703296704, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5079365079365079, "success_rate.epoch.env.logic": 0.4666666666666667, "success_rate.epoch.env.math": 0.9125441696113075, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7780439121756487, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5094607177365723, "success_rate.epoch.global": 0.7181993569131833, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9855769230769231, "tokens_p.mean_in_band": 0.8412642045454546, "tokens_rate.above_band": 0.9285714285714286, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07142857142857142 }, { "epoch": 0.9192154255319149, "grad_norm": 67.83569970990335, "learning_rate": 1.8322312744306807e-07, "loss": 0.7035, "step": 5530, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46703296703296704, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5079365079365079, "success_rate.epoch.env.logic": 0.4672897196261682, "success_rate.epoch.env.math": 0.912621359223301, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7780215396888711, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5095223422897285, "success_rate.epoch.global": 0.7182881377714947, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.5599999999999999, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.986, "tokens_p.mean_below_band": 2.88089116414388e-07, "tokens_p.mean_in_band": 0.10345790952890792, "tokens_rate.above_band": 0.13791835233541744, "tokens_rate.below_band": 0.0033100404560500183, "tokens_rate.in_band": 0.8587716072085325 }, { "epoch": 0.9200465425531915, "grad_norm": 149.39185118653322, "learning_rate": 1.8319682886911243e-07, "loss": 0.4134, "step": 5535, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46703296703296704, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5052631578947369, "success_rate.epoch.env.logic": 0.4679113185530922, "success_rate.epoch.env.math": 0.9126984126984127, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7780215396888711, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5093428243224798, "success_rate.epoch.global": 0.718268242548818, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.4, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9940915300546448, "tokens_p.mean_in_band": 0.6588635341726619, "tokens_rate.above_band": 0.9294057897409853, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07059421025901473 }, { "epoch": 0.9208776595744681, "grad_norm": 122.91575502138198, "learning_rate": 1.8317052462156875e-07, "loss": 0.5052, "step": 5540, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46703296703296704, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5052631578947369, "success_rate.epoch.env.logic": 0.4679113185530922, "success_rate.epoch.env.math": 0.9127753303964757, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7779549531592586, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5093437635196116, "success_rate.epoch.global": 0.7182845403184386, "success_rate.window.env.abd": 0.5, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.998284869083585, "tokens_p.mean_below_band": 7.188646122813225e-09, "tokens_p.mean_in_band": 0.15450613839285715, "tokens_rate.above_band": 0.8870031263957123, "tokens_rate.below_band": 0.00044662795891022776, "tokens_rate.in_band": 0.1125502456453774 }, { "epoch": 0.9217087765957447, "grad_norm": 45.88892155646569, "learning_rate": 1.8314421474712056e-07, "loss": 0.3265, "step": 5545, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46703296703296704, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5052631578947369, "success_rate.epoch.env.logic": 0.4679113185530922, "success_rate.epoch.env.math": 0.9128521126760564, "success_rate.epoch.env.sat": 0.109375, "success_rate.epoch.env.science": 0.7779326827325234, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5093487191425976, "success_rate.epoch.global": 0.7183369690748107, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9885519801980198, "tokens_p.mean_in_band": 0.576171875, "tokens_rate.above_band": 0.8416666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15833333333333333 }, { "epoch": 0.9225398936170213, "grad_norm": 67.16419115377029, "learning_rate": 1.8311789929246154e-07, "loss": 0.3728, "step": 5550, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46703296703296704, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5052631578947369, "success_rate.epoch.env.logic": 0.46853146853146854, "success_rate.epoch.env.math": 0.9120492524186455, "success_rate.epoch.env.sat": 0.10903426791277258, "success_rate.epoch.env.science": 0.7780211029265379, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5093091716723933, "success_rate.epoch.global": 0.718261092587843, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952098540145985, "tokens_p.mean_in_band": 0.6761067708333334, "tokens_rate.above_band": 0.9448275862068966, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05517241379310345 }, { "epoch": 0.9233710106382979, "grad_norm": 52.69756643980287, "learning_rate": 1.8309157830429512e-07, "loss": 0.3466, "step": 5555, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5052631578947369, "success_rate.epoch.env.logic": 0.46853146853146854, "success_rate.epoch.env.math": 0.9121265377855887, "success_rate.epoch.env.sat": 0.10869565217391304, "success_rate.epoch.env.science": 0.7781536012733784, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5095622222299582, "success_rate.epoch.global": 0.7183495643259866, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976954277286135, "tokens_p.mean_in_band": 0.6669921875, "tokens_rate.above_band": 0.904, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.096 }, { "epoch": 0.9242021276595744, "grad_norm": 68.73886400895024, "learning_rate": 1.8306525182933458e-07, "loss": 0.4179, "step": 5560, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5052631578947369, "success_rate.epoch.env.logic": 0.4691501746216531, "success_rate.epoch.env.math": 0.9121265377855887, "success_rate.epoch.env.sat": 0.10869565217391304, "success_rate.epoch.env.science": 0.7782859415390734, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5096304991714017, "success_rate.epoch.global": 0.7184938524590164, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9933873720136519, "tokens_p.mean_in_band": 0.81640625, "tokens_rate.above_band": 0.9543973941368078, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04560260586319218 }, { "epoch": 0.925033244680851, "grad_norm": 94.56886753915559, "learning_rate": 1.8303891991430303e-07, "loss": 0.2386, "step": 5565, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5052631578947369, "success_rate.epoch.env.logic": 0.4697674418604651, "success_rate.epoch.env.math": 0.9121265377855887, "success_rate.epoch.env.sat": 0.10869565217391304, "success_rate.epoch.env.science": 0.778594122319301, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5097146308094963, "success_rate.epoch.global": 0.7187819856704196, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954111600587372, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.9742489270386266, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02575107296137339 }, { "epoch": 0.9258643617021277, "grad_norm": 86.2283807168584, "learning_rate": 1.830125826059331e-07, "loss": 0.4311, "step": 5570, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5026178010471204, "success_rate.epoch.env.logic": 0.4697674418604651, "success_rate.epoch.env.math": 0.9121265377855887, "success_rate.epoch.env.sat": 0.10869565217391304, "success_rate.epoch.env.science": 0.7787259376860488, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5094861270385083, "success_rate.epoch.global": 0.7187979539641943, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9932541824069078, "tokens_p.mean_below_band": 3.2782554626464844e-07, "tokens_p.mean_in_band": 0.4228376400203666, "tokens_rate.above_band": 0.7900234491579621, "tokens_rate.below_band": 0.0006395224898742272, "tokens_rate.in_band": 0.20933702835216372 }, { "epoch": 0.9266954787234043, "grad_norm": 60.439870970998264, "learning_rate": 1.8298623995096713e-07, "loss": 0.3498, "step": 5575, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5026178010471204, "success_rate.epoch.env.logic": 0.4697674418604651, "success_rate.epoch.env.math": 0.9121265377855887, "success_rate.epoch.env.sat": 0.10835913312693499, "success_rate.epoch.env.science": 0.7789452815226011, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5094754747466513, "success_rate.epoch.global": 0.7188857653973934, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.4375, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9926394628099173, "tokens_p.mean_in_band": 0.6589133522727273, "tokens_rate.above_band": 0.8461538461538461, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15384615384615385 }, { "epoch": 0.9275265957446809, "grad_norm": 118.51258004537772, "learning_rate": 1.8295989199615682e-07, "loss": 0.4497, "step": 5580, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5026178010471204, "success_rate.epoch.env.logic": 0.4697674418604651, "success_rate.epoch.env.math": 0.9122036874451273, "success_rate.epoch.env.sat": 0.10835913312693499, "success_rate.epoch.env.science": 0.7790766792153755, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5094944335968616, "success_rate.epoch.global": 0.719029374201788, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9930921052631579, "tokens_p.mean_in_band": 0.8177083333333334, "tokens_rate.above_band": 0.9693877551020408, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030612244897959183 }, { "epoch": 0.9283577127659575, "grad_norm": 82.31978805853106, "learning_rate": 1.8293353878826337e-07, "loss": 0.344, "step": 5585, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5052083333333334, "success_rate.epoch.env.logic": 0.4697674418604651, "success_rate.epoch.env.math": 0.9122036874451273, "success_rate.epoch.env.sat": 0.10835913312693499, "success_rate.epoch.env.science": 0.7792079207920792, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5097418675843994, "success_rate.epoch.global": 0.7191728363543528, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949596774193549, "tokens_p.mean_in_band": 0.6793094758064516, "tokens_rate.above_band": 0.9583333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041666666666666664 }, { "epoch": 0.929188829787234, "grad_norm": 112.32913906802416, "learning_rate": 1.8290718037405727e-07, "loss": 0.376, "step": 5590, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5052083333333334, "success_rate.epoch.env.logic": 0.4697674418604651, "success_rate.epoch.env.math": 0.9122036874451273, "success_rate.epoch.env.sat": 0.10835913312693499, "success_rate.epoch.env.science": 0.7790973871733967, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5097318190736102, "success_rate.epoch.global": 0.7191168963757019, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.5599999999999999, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9989247311827957, "tokens_p.mean_in_band": 0.5826171875, "tokens_rate.above_band": 0.9587628865979382, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041237113402061855 }, { "epoch": 0.9300199468085106, "grad_norm": 210.56060122057153, "learning_rate": 1.8288081680031818e-07, "loss": 0.5236, "step": 5595, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5052083333333334, "success_rate.epoch.env.logic": 0.4697674418604651, "success_rate.epoch.env.math": 0.9122807017543859, "success_rate.epoch.env.sat": 0.10835913312693499, "success_rate.epoch.env.science": 0.7792721518987342, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5097547080767553, "success_rate.epoch.global": 0.7192960081622242, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9927685950413223, "tokens_p.mean_in_band": 0.6494391025641025, "tokens_rate.above_band": 0.8612099644128114, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1387900355871886 }, { "epoch": 0.9308510638297872, "grad_norm": 123.92616469593366, "learning_rate": 1.8285444811383508e-07, "loss": 0.3939, "step": 5600, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5052083333333334, "success_rate.epoch.env.logic": 0.4692218350754936, "success_rate.epoch.env.math": 0.9122807017543859, "success_rate.epoch.env.sat": 0.10802469135802469, "success_rate.epoch.env.science": 0.7794030440798577, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.509686602951959, "success_rate.epoch.global": 0.7192199847055825, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986538461538461, "tokens_p.mean_in_band": 0.4475911458333333, "tokens_rate.above_band": 0.9312320916905444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06876790830945559 }, { "epoch": 0.9316821808510638, "grad_norm": 192.43246560782973, "learning_rate": 1.8282807436140588e-07, "loss": 0.5641, "step": 5605, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5052083333333334, "success_rate.epoch.env.logic": 0.4692218350754936, "success_rate.epoch.env.math": 0.9115586690017513, "success_rate.epoch.env.sat": 0.10802469135802469, "success_rate.epoch.env.science": 0.7794466403162056, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5096249269050239, "success_rate.epoch.global": 0.719199898076188, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983501552795031, "tokens_p.mean_in_band": 0.4619732481060606, "tokens_rate.above_band": 0.9512555391432792, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04874446085672083 }, { "epoch": 0.9325132978723404, "grad_norm": 66.30292936547168, "learning_rate": 1.8280169558983754e-07, "loss": 0.4176, "step": 5610, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5052083333333334, "success_rate.epoch.env.logic": 0.4692218350754936, "success_rate.epoch.env.math": 0.9115586690017513, "success_rate.epoch.env.sat": 0.10802469135802469, "success_rate.epoch.env.science": 0.7796208530805687, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5096407644290569, "success_rate.epoch.global": 0.7193429262702152, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9873511904761905, "tokens_p.mean_in_band": 0.8818359375, "tokens_rate.above_band": 0.9692307692307692, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03076923076923077 }, { "epoch": 0.933344414893617, "grad_norm": 64.2818990011968, "learning_rate": 1.827753118459459e-07, "loss": 0.3374, "step": 5615, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5077720207253886, "success_rate.epoch.env.logic": 0.4692218350754936, "success_rate.epoch.env.math": 0.9115586690017513, "success_rate.epoch.env.sat": 0.10802469135802469, "success_rate.epoch.env.science": 0.7798382323929769, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5098935886749172, "success_rate.epoch.global": 0.7195571955719557, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9930555555555556, "tokens_p.mean_in_band": 0.5616319444444444, "tokens_rate.above_band": 0.9591836734693877, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04081632653061224 }, { "epoch": 0.9341755319148937, "grad_norm": 97.80506391486851, "learning_rate": 1.8274892317655566e-07, "loss": 0.5687, "step": 5620, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5077720207253886, "success_rate.epoch.env.logic": 0.4692218350754936, "success_rate.epoch.env.math": 0.9116360454943132, "success_rate.epoch.env.sat": 0.10802469135802469, "success_rate.epoch.env.science": 0.7800118273211117, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.509916404258617, "success_rate.epoch.global": 0.719735503560529, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9954681588902901, "tokens_p.mean_in_band": 0.6418779481132075, "tokens_rate.above_band": 0.9373522458628841, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06264775413711583 }, { "epoch": 0.9350066489361702, "grad_norm": 295.8926035675023, "learning_rate": 1.8272252962850022e-07, "loss": 0.419, "step": 5625, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5077720207253886, "success_rate.epoch.env.logic": 0.46983758700696054, "success_rate.epoch.env.math": 0.9117132867132867, "success_rate.epoch.env.sat": 0.1076923076923077, "success_rate.epoch.env.science": 0.7800551832873472, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5099531283905223, "success_rate.epoch.global": 0.7197508896797153, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9952713815789473, "tokens_p.mean_in_band": 0.6477430555555556, "tokens_rate.above_band": 0.8710601719197708, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12893982808022922 }, { "epoch": 0.9358377659574468, "grad_norm": 69.1298632991204, "learning_rate": 1.826961312486217e-07, "loss": 0.2545, "step": 5630, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5077720207253886, "success_rate.epoch.env.logic": 0.4704519119351101, "success_rate.epoch.env.math": 0.9117903930131004, "success_rate.epoch.env.sat": 0.1076923076923077, "success_rate.epoch.env.science": 0.780185148709868, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5100278008132936, "success_rate.epoch.global": 0.7199288708243363, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.992688679245283, "tokens_p.mean_in_band": 0.697265625, "tokens_rate.above_band": 0.9430604982206405, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05693950177935943 }, { "epoch": 0.9366688829787234, "grad_norm": 134.50068873257337, "learning_rate": 1.8266972808377074e-07, "loss": 0.407, "step": 5635, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5077720207253886, "success_rate.epoch.env.logic": 0.4710648148148148, "success_rate.epoch.env.math": 0.9117903930131004, "success_rate.epoch.env.sat": 0.10736196319018405, "success_rate.epoch.env.science": 0.7803149606299212, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5100652890221694, "success_rate.epoch.global": 0.7199796902767199, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9956234643734644, "tokens_p.mean_in_band": 0.5833834134615384, "tokens_rate.above_band": 0.9399538106235565, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06004618937644342 }, { "epoch": 0.9375, "grad_norm": 89.89322297515757, "learning_rate": 1.826433201808065e-07, "loss": 0.4559, "step": 5640, "success_rate.epoch.env.abd": 0.4583333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5077720207253886, "success_rate.epoch.env.logic": 0.4710648148148148, "success_rate.epoch.env.math": 0.9117903930131004, "success_rate.epoch.env.sat": 0.10736196319018405, "success_rate.epoch.env.science": 0.7804878048780488, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5100810021356356, "success_rate.epoch.global": 0.7201217964983506, "success_rate.window.env.ded": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9922480620155039, "tokens_p.mean_in_band": 0.544140625, "tokens_rate.above_band": 0.8657718120805369, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1342281879194631 }, { "epoch": 0.9383311170212766, "grad_norm": 140.61777539299692, "learning_rate": 1.826169075865965e-07, "loss": 0.471, "step": 5645, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5051546391752577, "success_rate.epoch.env.logic": 0.47167630057803467, "success_rate.epoch.env.math": 0.9117903930131004, "success_rate.epoch.env.sat": 0.10703363914373089, "success_rate.epoch.env.science": 0.7804638364779874, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5108715688206612, "success_rate.epoch.global": 0.7200253485424588, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6599999999999999, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9881500987491771, "tokens_p.mean_in_band": 0.6969708136792453, "tokens_rate.above_band": 0.877527440785673, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12247255921432698 }, { "epoch": 0.9391622340425532, "grad_norm": 155.31347010657367, "learning_rate": 1.8259049034801664e-07, "loss": 0.4401, "step": 5650, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5051546391752577, "success_rate.epoch.env.logic": 0.47167630057803467, "success_rate.epoch.env.math": 0.9117903930131004, "success_rate.epoch.env.sat": 0.10703363914373089, "success_rate.epoch.env.science": 0.7807224185316058, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.510895076280081, "success_rate.epoch.global": 0.7202380952380952, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9937043795620438, "tokens_p.mean_below_band": 2.2351741790771484e-07, "tokens_p.mean_in_band": 0.5067377253320683, "tokens_rate.above_band": 0.8664305590690615, "tokens_rate.below_band": 0.00025297242600556537, "tokens_rate.in_band": 0.13331646850493295 }, { "epoch": 0.9399933510638298, "grad_norm": 153.811615759648, "learning_rate": 1.8256406851195106e-07, "loss": 0.3535, "step": 5655, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5051546391752577, "success_rate.epoch.env.logic": 0.47167630057803467, "success_rate.epoch.env.math": 0.9117903930131004, "success_rate.epoch.env.sat": 0.10670731707317073, "success_rate.epoch.env.science": 0.7808084772370487, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5108732341559795, "success_rate.epoch.global": 0.7202177490821623, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9875553097345132, "tokens_p.mean_in_band": 0.7020596590909091, "tokens_rate.above_band": 0.9112903225806451, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08870967741935484 }, { "epoch": 0.9408244680851063, "grad_norm": 33.00768094321201, "learning_rate": 1.8253764212529204e-07, "loss": 0.5389, "step": 5660, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5051546391752577, "success_rate.epoch.env.logic": 0.47167630057803467, "success_rate.epoch.env.math": 0.9118673647469459, "success_rate.epoch.env.sat": 0.10670731707317073, "success_rate.epoch.env.science": 0.7808944684189878, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5108880489665054, "success_rate.epoch.global": 0.7203239686155404, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9971804511278195, "tokens_p.mean_in_band": 0.6077127659574468, "tokens_rate.above_band": 0.9396274887604368, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06037251123956326 }, { "epoch": 0.941655585106383, "grad_norm": 72.33420528706179, "learning_rate": 1.8251121123493992e-07, "loss": 0.3276, "step": 5665, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5051546391752577, "success_rate.epoch.env.logic": 0.47167630057803467, "success_rate.epoch.env.math": 0.9118673647469459, "success_rate.epoch.env.sat": 0.10670731707317073, "success_rate.epoch.env.science": 0.7809803921568628, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5108958602154031, "success_rate.epoch.global": 0.7203947368421053, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.989939024390244, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9951456310679612, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0048543689320388345 }, { "epoch": 0.9424867021276596, "grad_norm": 78.54609457190823, "learning_rate": 1.8248477588780312e-07, "loss": 0.3592, "step": 5670, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5051546391752577, "success_rate.epoch.env.logic": 0.4722863741339492, "success_rate.epoch.env.math": 0.912020905923345, "success_rate.epoch.env.sat": 0.10670731707317073, "success_rate.epoch.env.science": 0.7811091514795219, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5109769851294915, "success_rate.epoch.global": 0.7206068268015171, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9921116504854369, "tokens_p.mean_below_band": 9.255018085241318e-09, "tokens_p.mean_in_band": 0.871875, "tokens_rate.above_band": 0.9716981132075472, "tokens_rate.below_band": 0.0047169811320754715, "tokens_rate.in_band": 0.02358490566037736 }, { "epoch": 0.9433178191489362, "grad_norm": 91.49992006573414, "learning_rate": 1.8245833613079783e-07, "loss": 0.3057, "step": 5675, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5051546391752577, "success_rate.epoch.env.logic": 0.4722863741339492, "success_rate.epoch.env.math": 0.9113043478260869, "success_rate.epoch.env.sat": 0.10670731707317073, "success_rate.epoch.env.science": 0.7810419114766941, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5109057307567565, "success_rate.epoch.global": 0.7205306380290587, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9941432968536251, "tokens_p.mean_below_band": 4.3655745685100555e-09, "tokens_p.mean_in_band": 0.6121002906976745, "tokens_rate.above_band": 0.9432258064516129, "tokens_rate.below_band": 0.0012903225806451613, "tokens_rate.in_band": 0.05548387096774193 }, { "epoch": 0.9441489361702128, "grad_norm": 114.36942567723449, "learning_rate": 1.824318920108482e-07, "loss": 0.3578, "step": 5680, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5051546391752577, "success_rate.epoch.env.logic": 0.4722863741339492, "success_rate.epoch.env.math": 0.9113814074717637, "success_rate.epoch.env.sat": 0.10670731707317073, "success_rate.epoch.env.science": 0.7812133072407045, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5109283176121826, "success_rate.epoch.global": 0.7207070707070707, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996666666666667, "tokens_p.mean_in_band": 0.5948840725806451, "tokens_rate.above_band": 0.9603072983354674, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03969270166453265 }, { "epoch": 0.9449800531914894, "grad_norm": 112.42782695550275, "learning_rate": 1.8240544357488602e-07, "loss": 0.4535, "step": 5685, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5051546391752577, "success_rate.epoch.env.logic": 0.4728950403690888, "success_rate.epoch.env.math": 0.9113814074717637, "success_rate.epoch.env.sat": 0.10670731707317073, "success_rate.epoch.env.science": 0.7812989045383412, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5109914324787986, "success_rate.epoch.global": 0.7208128234254702, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9970472440944882, "tokens_p.mean_in_band": 0.6451981707317073, "tokens_rate.above_band": 0.9559139784946237, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04408602150537634 }, { "epoch": 0.945811170212766, "grad_norm": 86.37560047605557, "learning_rate": 1.8237899086985078e-07, "loss": 0.433, "step": 5690, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.46994535519125685, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5051546391752577, "success_rate.epoch.env.logic": 0.4728950403690888, "success_rate.epoch.env.math": 0.9113814074717637, "success_rate.epoch.env.sat": 0.10638297872340426, "success_rate.epoch.env.science": 0.7815126050420168, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5109813744927902, "success_rate.epoch.global": 0.7208979694791272, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961890243902439, "tokens_p.mean_in_band": 0.6688988095238095, "tokens_rate.above_band": 0.9398280802292264, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06017191977077364 }, { "epoch": 0.9466422872340425, "grad_norm": 166.9032547031139, "learning_rate": 1.8235253394268961e-07, "loss": 0.3494, "step": 5695, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47282608695652173, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5051546391752577, "success_rate.epoch.env.logic": 0.4728950403690888, "success_rate.epoch.env.math": 0.9114583333333334, "success_rate.epoch.env.sat": 0.10638297872340426, "success_rate.epoch.env.science": 0.781597968353194, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5112580127598823, "success_rate.epoch.global": 0.7210386991050044, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9946369636963697, "tokens_p.mean_in_band": 0.6320082720588235, "tokens_rate.above_band": 0.946875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.053125 }, { "epoch": 0.9474734042553191, "grad_norm": 53.286843872894984, "learning_rate": 1.8232607284035701e-07, "loss": 0.4844, "step": 5700, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47282608695652173, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.3684210526315789, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5051546391752577, "success_rate.epoch.env.logic": 0.4728950403690888, "success_rate.epoch.env.math": 0.9114583333333334, "success_rate.epoch.env.sat": 0.10606060606060606, "success_rate.epoch.env.science": 0.7816832649873071, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5112364603936381, "success_rate.epoch.global": 0.7210181451612904, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9894340974212035, "tokens_p.mean_in_band": 0.7130408653846154, "tokens_rate.above_band": 0.9306666666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06933333333333333 }, { "epoch": 0.9483045212765957, "grad_norm": 55.37192669990385, "learning_rate": 1.82299607609815e-07, "loss": 0.3795, "step": 5705, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47282608695652173, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5076923076923077, "success_rate.epoch.env.logic": 0.4728950403690888, "success_rate.epoch.env.math": 0.9114583333333334, "success_rate.epoch.env.sat": 0.10606060606060606, "success_rate.epoch.env.science": 0.7818536585365854, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5143534612513426, "success_rate.epoch.global": 0.7212289095945605, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949742759146342, "tokens_p.mean_in_band": 0.7826286764705882, "tokens_rate.above_band": 0.9872084273890143, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012791572610985704 }, { "epoch": 0.9491356382978723, "grad_norm": 61.4604155222677, "learning_rate": 1.8227313829803286e-07, "loss": 0.2709, "step": 5710, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47282608695652173, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5076923076923077, "success_rate.epoch.env.logic": 0.4728950403690888, "success_rate.epoch.env.math": 0.9116117850953206, "success_rate.epoch.env.sat": 0.10606060606060606, "success_rate.epoch.env.science": 0.7821087507308517, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.514390601611002, "success_rate.epoch.global": 0.7215094339622642, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9898148148148148, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9926470588235294, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007352941176470588 }, { "epoch": 0.949966755319149, "grad_norm": 56.37754010095084, "learning_rate": 1.822466649519871e-07, "loss": 0.4267, "step": 5715, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47282608695652173, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5102040816326531, "success_rate.epoch.env.logic": 0.4728950403690888, "success_rate.epoch.env.math": 0.9116117850953206, "success_rate.epoch.env.sat": 0.10574018126888217, "success_rate.epoch.env.science": 0.7820837390457643, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5145875413804143, "success_rate.epoch.global": 0.7214680744092509, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4642857142857143, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9949751148545176, "tokens_p.mean_in_band": 0.6371626420454546, "tokens_rate.above_band": 0.9674074074074074, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03259259259259259 }, { "epoch": 0.9507978723404256, "grad_norm": 69.17142813485735, "learning_rate": 1.822201876186615e-07, "loss": 0.3764, "step": 5720, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47282608695652173, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5102040816326531, "success_rate.epoch.env.logic": 0.4728950403690888, "success_rate.epoch.env.math": 0.9116117850953206, "success_rate.epoch.env.sat": 0.10574018126888217, "success_rate.epoch.env.science": 0.782295719844358, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5146068123621046, "success_rate.epoch.global": 0.7216430096721518, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9993594436310396, "tokens_p.mean_in_band": 0.5391049592391305, "tokens_rate.above_band": 0.9368998628257887, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06310013717421124 }, { "epoch": 0.9516289893617021, "grad_norm": 148.37458987188273, "learning_rate": 1.8219370634504677e-07, "loss": 0.4433, "step": 5725, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47282608695652173, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5102040816326531, "success_rate.epoch.env.logic": 0.4728950403690888, "success_rate.epoch.env.math": 0.9116883116883117, "success_rate.epoch.env.sat": 0.10574018126888217, "success_rate.epoch.env.science": 0.7823803967327888, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.514621467224052, "success_rate.epoch.global": 0.7217478653942743, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.995954003407155, "tokens_p.mean_in_band": 0.6315104166666666, "tokens_rate.above_band": 0.9949152542372881, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005084745762711864 }, { "epoch": 0.9524601063829787, "grad_norm": 108.27536602654064, "learning_rate": 1.8216722117814074e-07, "loss": 0.3811, "step": 5730, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47282608695652173, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5102040816326531, "success_rate.epoch.env.logic": 0.4735023041474654, "success_rate.epoch.env.math": 0.9117647058823529, "success_rate.epoch.env.sat": 0.10574018126888217, "success_rate.epoch.env.science": 0.7825072886297376, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5146951535758126, "success_rate.epoch.global": 0.7219224494917806, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9861680327868853, "tokens_p.mean_in_band": 0.8127170138888888, "tokens_rate.above_band": 0.9104477611940298, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08955223880597014 }, { "epoch": 0.9532912234042553, "grad_norm": 164.29866779636046, "learning_rate": 1.8214073216494804e-07, "loss": 0.3765, "step": 5735, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47282608695652173, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5102040816326531, "success_rate.epoch.env.logic": 0.47295742232451093, "success_rate.epoch.env.math": 0.9118409680207433, "success_rate.epoch.env.sat": 0.10574018126888217, "success_rate.epoch.env.science": 0.7825918010491548, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5146602347335265, "success_rate.epoch.global": 0.7219365358083532, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99839514978602, "tokens_p.mean_in_band": 0.6144153225806451, "tokens_rate.above_band": 0.9576502732240437, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04234972677595628 }, { "epoch": 0.9541223404255319, "grad_norm": 95.65806487206133, "learning_rate": 1.8211423935248022e-07, "loss": 0.4464, "step": 5740, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47282608695652173, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5102040816326531, "success_rate.epoch.env.logic": 0.47295742232451093, "success_rate.epoch.env.math": 0.9119170984455959, "success_rate.epoch.env.sat": 0.10574018126888217, "success_rate.epoch.env.science": 0.7824820353466693, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5146571769810145, "success_rate.epoch.global": 0.7219157472417251, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.978150406504065, "tokens_p.mean_in_band": 0.7730034722222222, "tokens_rate.above_band": 0.9318181818181818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06818181818181818 }, { "epoch": 0.9549534574468085, "grad_norm": 114.87790922893703, "learning_rate": 1.820877427877555e-07, "loss": 0.3373, "step": 5745, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5102040816326531, "success_rate.epoch.env.logic": 0.4735632183908046, "success_rate.epoch.env.math": 0.911993097497843, "success_rate.epoch.env.sat": 0.10574018126888217, "success_rate.epoch.env.science": 0.782608695652174, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5144983259571776, "success_rate.epoch.global": 0.7219994988724631, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983175473579262, "tokens_p.mean_in_band": 0.68583984375, "tokens_rate.above_band": 0.9804496578690127, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019550342130987292 }, { "epoch": 0.9557845744680851, "grad_norm": 85.4252376742887, "learning_rate": 1.8206124251779882e-07, "loss": 0.4428, "step": 5750, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4730195177956372, "success_rate.epoch.env.math": 0.9120689655172414, "success_rate.epoch.env.sat": 0.10574018126888217, "success_rate.epoch.env.science": 0.7826508829807879, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5146856558162813, "success_rate.epoch.global": 0.7220135236664162, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.72, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9942100650976464, "tokens_p.mean_in_band": 0.6355902777777778, "tokens_rate.above_band": 0.9779627815866797, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022037218413320275 }, { "epoch": 0.9566156914893617, "grad_norm": 195.25862102688197, "learning_rate": 1.8203473858964164e-07, "loss": 0.2643, "step": 5755, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4730195177956372, "success_rate.epoch.env.math": 0.9121447028423773, "success_rate.epoch.env.sat": 0.10542168674698796, "success_rate.epoch.env.science": 0.7827352085354026, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5146712529397228, "success_rate.epoch.global": 0.7220275344180225, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9887724550898204, "tokens_p.mean_in_band": 0.6139322916666666, "tokens_rate.above_band": 0.9027027027027027, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0972972972972973 }, { "epoch": 0.9574468085106383, "grad_norm": 97.77268051467541, "learning_rate": 1.8200823105032195e-07, "loss": 0.3047, "step": 5760, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4730195177956372, "success_rate.epoch.env.math": 0.9122203098106713, "success_rate.epoch.env.sat": 0.10542168674698796, "success_rate.epoch.env.science": 0.7828194686833431, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5146857863139259, "success_rate.epoch.global": 0.7221318653822094, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4642857142857143, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9979047788873039, "tokens_p.mean_in_band": 0.5461128048780488, "tokens_rate.above_band": 0.944743935309973, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05525606469002695 }, { "epoch": 0.9582779255319149, "grad_norm": 70.86647272407083, "learning_rate": 1.8198171994688414e-07, "loss": 0.5016, "step": 5765, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4730195177956372, "success_rate.epoch.env.math": 0.9122203098106713, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7829036635006784, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5146646602389671, "success_rate.epoch.global": 0.7221110555277639, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.998405612244898, "tokens_p.mean_in_band": 0.5956029647435898, "tokens_rate.above_band": 0.9617271835132483, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038272816486751716 }, { "epoch": 0.9591090425531915, "grad_norm": 99.77237951397949, "learning_rate": 1.8195520532637894e-07, "loss": 0.3893, "step": 5770, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4724770642201835, "success_rate.epoch.env.math": 0.9122203098106713, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7830718574472206, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5146306366363388, "success_rate.epoch.global": 0.7221597300337458, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9999204834605598, "tokens_p.mean_in_band": 0.5905230978260869, "tokens_rate.above_band": 0.980865224625624, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01913477537437604 }, { "epoch": 0.9599401595744681, "grad_norm": 87.48859439512039, "learning_rate": 1.8192868723586328e-07, "loss": 0.3305, "step": 5775, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47368421052631576, "success_rate.epoch.env.math": 0.9122203098106713, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.783239790981227, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5147556438945332, "success_rate.epoch.global": 0.7223679280629449, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9914772727272727, "tokens_p.mean_in_band": 0.79296875, "tokens_rate.above_band": 0.9892086330935251, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01079136690647482 }, { "epoch": 0.9607712765957447, "grad_norm": 88.92169842872373, "learning_rate": 1.8190216572240028e-07, "loss": 0.4432, "step": 5780, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47368421052631576, "success_rate.epoch.env.math": 0.9122957867583835, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.783323660282453, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.514770129917164, "success_rate.epoch.global": 0.7224719101123596, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9895341981132075, "tokens_p.mean_in_band": 0.7091346153846154, "tokens_rate.above_band": 0.9422222222222222, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.057777777777777775 }, { "epoch": 0.9616023936170213, "grad_norm": 73.74395543092918, "learning_rate": 1.8187564083305913e-07, "loss": 0.4658, "step": 5785, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47368421052631576, "success_rate.epoch.env.math": 0.9123711340206185, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7834912043301759, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5147922109453419, "success_rate.epoch.global": 0.7226450405489707, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9928085774058577, "tokens_p.mean_in_band": 0.7260298295454546, "tokens_rate.above_band": 0.9157088122605364, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0842911877394636 }, { "epoch": 0.9624335106382979, "grad_norm": 35.34862413943854, "learning_rate": 1.8184911261491501e-07, "loss": 0.2809, "step": 5790, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47368421052631576, "success_rate.epoch.env.math": 0.9125964010282777, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7836166924265843, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5148240977729844, "success_rate.epoch.global": 0.7228525121555915, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9941620879120879, "tokens_p.mean_in_band": 0.70751953125, "tokens_rate.above_band": 0.9191919191919192, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08080808080808081 }, { "epoch": 0.9632646276595744, "grad_norm": 86.21827267420791, "learning_rate": 1.8182258111504903e-07, "loss": 0.3023, "step": 5795, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4742857142857143, "success_rate.epoch.env.math": 0.9126712328767124, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7838255163095927, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5149045668176064, "success_rate.epoch.global": 0.7230941704035875, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9926282051282052, "tokens_p.mean_in_band": 0.6912286931818182, "tokens_rate.above_band": 0.8986175115207373, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10138248847926268 }, { "epoch": 0.964095744680851, "grad_norm": 82.54379671555498, "learning_rate": 1.8179604638054806e-07, "loss": 0.4659, "step": 5800, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4737442922374429, "success_rate.epoch.env.math": 0.9126712328767124, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7839089330503569, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5148629299714694, "success_rate.epoch.global": 0.7230730917693936, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9996764053254438, "tokens_p.mean_in_band": 0.5701593137254902, "tokens_rate.above_band": 0.9298486932599724, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07015130674002751 }, { "epoch": 0.9649268617021277, "grad_norm": 377.4709246861495, "learning_rate": 1.8176950845850482e-07, "loss": 0.3708, "step": 5805, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47434435575826683, "success_rate.epoch.env.math": 0.9127459366980325, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7839922854387656, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5149318499469742, "success_rate.epoch.global": 0.7232109520846297, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9914089347079038, "tokens_p.mean_in_band": 0.689203789893617, "tokens_rate.above_band": 0.8919540229885058, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10804597701149425 }, { "epoch": 0.9657579787234043, "grad_norm": 74.345270079809, "learning_rate": 1.817429673960175e-07, "loss": 0.3647, "step": 5810, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47554038680318544, "success_rate.epoch.env.math": 0.9128205128205128, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7840755735492577, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5150549313358733, "success_rate.epoch.global": 0.7233830845771144, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9932036247334755, "tokens_p.mean_in_band": 0.8349826388888889, "tokens_rate.above_band": 0.9542217700915565, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04577822990844354 }, { "epoch": 0.9665890957446809, "grad_norm": 130.62739845741018, "learning_rate": 1.8171642324019013e-07, "loss": 0.4013, "step": 5815, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47613636363636364, "success_rate.epoch.env.math": 0.9128949615713066, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7840077071290944, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5151097094416741, "success_rate.epoch.global": 0.723430702299565, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9964710252600297, "tokens_p.mean_in_band": 0.599609375, "tokens_rate.above_band": 0.9492242595204513, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05077574047954866 }, { "epoch": 0.9674202127659575, "grad_norm": 132.90832053608324, "learning_rate": 1.8168987603813198e-07, "loss": 0.3129, "step": 5820, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47613636363636364, "success_rate.epoch.env.math": 0.9129692832764505, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.784298633827208, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5151429138419702, "success_rate.epoch.global": 0.7237054513845772, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9918893129770993, "tokens_p.mean_in_band": 0.57861328125, "tokens_rate.above_band": 0.891156462585034, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10884353741496598 }, { "epoch": 0.968251329787234, "grad_norm": 169.92809722242012, "learning_rate": 1.8166332583695788e-07, "loss": 0.4216, "step": 5825, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47678369195922987, "success_rate.epoch.env.math": 0.9130434782608695, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.784340130819546, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5152122793237542, "success_rate.epoch.global": 0.7237870703561236, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6944444444444443, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9987476518472135, "tokens_p.mean_in_band": 0.6583136792452831, "tokens_rate.above_band": 0.9890586292320397, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010941370767960363 }, { "epoch": 0.9690824468085106, "grad_norm": 317.9623634523537, "learning_rate": 1.816367726837879e-07, "loss": 0.3503, "step": 5830, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4768361581920904, "success_rate.epoch.env.math": 0.9130434782608695, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7841214917339485, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5151971727007781, "success_rate.epoch.global": 0.723620582765034, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.5, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9951388888888889, "tokens_p.mean_below_band": 8.458683753431728e-11, "tokens_p.mean_in_band": 0.5410610465116279, "tokens_rate.above_band": 0.9375, "tokens_rate.below_band": 0.002777777777777778, "tokens_rate.in_band": 0.059722222222222225 }, { "epoch": 0.9699135638297872, "grad_norm": 146.56793898190534, "learning_rate": 1.816102166257474e-07, "loss": 0.5242, "step": 5835, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4762979683972912, "success_rate.epoch.env.math": 0.9131175468483816, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7841629828944839, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5151587517874371, "success_rate.epoch.global": 0.7235994050570154, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9996357808857809, "tokens_p.mean_in_band": 0.6042374320652174, "tokens_rate.above_band": 0.9491150442477876, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05088495575221239 }, { "epoch": 0.9707446808510638, "grad_norm": 48.895591592111934, "learning_rate": 1.8158365770996686e-07, "loss": 0.4177, "step": 5840, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47576099210823, "success_rate.epoch.env.math": 0.9131175468483816, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7842873607376104, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5151212428378066, "success_rate.epoch.global": 0.7236124876114965, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9965538259564891, "tokens_p.mean_in_band": 0.5791713169642857, "tokens_rate.above_band": 0.9596832253419726, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.040316774658027354 }, { "epoch": 0.9715757978723404, "grad_norm": 98.08989534898663, "learning_rate": 1.8155709598358187e-07, "loss": 0.41, "step": 5845, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47576099210823, "success_rate.epoch.env.math": 0.9132653061224489, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7841367390051853, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5151209826143195, "success_rate.epoch.global": 0.7235913312693498, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994098712446352, "tokens_p.mean_in_band": 0.6375177556818182, "tokens_rate.above_band": 0.9137254901960784, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08627450980392157 }, { "epoch": 0.972406914893617, "grad_norm": 65.55318611287204, "learning_rate": 1.81530531493733e-07, "loss": 0.3725, "step": 5850, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47635135135135137, "success_rate.epoch.env.math": 0.9132653061224489, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7843438219493477, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5151934773586181, "success_rate.epoch.global": 0.723796559831704, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9938063063063063, "tokens_p.mean_in_band": 0.569921875, "tokens_rate.above_band": 0.943342776203966, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.056657223796033995 }, { "epoch": 0.9732380319148937, "grad_norm": 71.79185250514684, "learning_rate": 1.8150396428756568e-07, "loss": 0.3341, "step": 5855, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4769403824521935, "success_rate.epoch.env.math": 0.9132653061224489, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7845092024539877, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5152620602318437, "success_rate.epoch.global": 0.7239673509769973, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964064774381368, "tokens_p.mean_in_band": 0.76171875, "tokens_rate.above_band": 0.9870689655172413, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01293103448275862 }, { "epoch": 0.9740691489361702, "grad_norm": 61.65205173501351, "learning_rate": 1.8147739441223021e-07, "loss": 0.5612, "step": 5860, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4769403824521935, "success_rate.epoch.env.math": 0.9132653061224489, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7846330714696302, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5152733210514476, "success_rate.epoch.global": 0.7240697243169737, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9909206081081081, "tokens_p.mean_in_band": 0.7307942708333334, "tokens_rate.above_band": 0.925, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.075 }, { "epoch": 0.9749002659574468, "grad_norm": 118.51365042509147, "learning_rate": 1.8145082191488162e-07, "loss": 0.2667, "step": 5865, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4702702702702703, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4769403824521935, "success_rate.epoch.env.math": 0.9132653061224489, "success_rate.epoch.env.sat": 0.10510510510510511, "success_rate.epoch.env.science": 0.7847155717295538, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.515280821075077, "success_rate.epoch.global": 0.7241379310344828, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979727056962026, "tokens_p.mean_in_band": 0.51591796875, "tokens_rate.above_band": 0.9693251533742331, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03067484662576687 }, { "epoch": 0.9757313829787234, "grad_norm": 81.44347307064916, "learning_rate": 1.8142424684267953e-07, "loss": 0.3328, "step": 5870, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4769403824521935, "success_rate.epoch.env.math": 0.9132653061224489, "success_rate.epoch.env.sat": 0.10479041916167664, "success_rate.epoch.env.science": 0.7848392036753445, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5155223624661666, "success_rate.epoch.global": 0.7241847826086957, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9960841523341524, "tokens_p.mean_in_band": 0.7125, "tokens_rate.above_band": 0.9531615925058547, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0468384074941452 }, { "epoch": 0.9765625, "grad_norm": 149.1031706418972, "learning_rate": 1.8139766924278824e-07, "loss": 0.3841, "step": 5875, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4769403824521935, "success_rate.epoch.env.math": 0.9132653061224489, "success_rate.epoch.env.sat": 0.10479041916167664, "success_rate.epoch.env.science": 0.7850860420650095, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5155448023197725, "success_rate.epoch.global": 0.7243890397432733, "success_rate.window.env.ded": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.4375, "success_rate.window.global": 0.7777777777777778, "tokens_p.mean_above_band": 0.9942306307603687, "tokens_p.mean_in_band": 0.5265157418952618, "tokens_rate.above_band": 0.8964626904208624, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10353730957913762 }, { "epoch": 0.9773936170212766, "grad_norm": 61.66881901678859, "learning_rate": 1.8137108916237645e-07, "loss": 0.5179, "step": 5880, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4769403824521935, "success_rate.epoch.env.math": 0.9132653061224489, "success_rate.epoch.env.sat": 0.10479041916167664, "success_rate.epoch.env.science": 0.7852502865876958, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5155597336400166, "success_rate.epoch.global": 0.7245250431778929, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9863696808510638, "tokens_p.mean_in_band": 0.7795138888888888, "tokens_rate.above_band": 0.912621359223301, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08737864077669903 }, { "epoch": 0.9782247340425532, "grad_norm": 72.2111997471744, "learning_rate": 1.813445066486173e-07, "loss": 0.4135, "step": 5885, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47752808988764045, "success_rate.epoch.env.math": 0.9132653061224489, "success_rate.epoch.env.sat": 0.10479041916167664, "success_rate.epoch.env.science": 0.7852913085004776, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5156168908534919, "success_rate.epoch.global": 0.724592994573261, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9988295880149812, "tokens_p.mean_in_band": 0.6165865384615384, "tokens_rate.above_band": 0.9535714285714286, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04642857142857143 }, { "epoch": 0.9790558510638298, "grad_norm": 66.40379563122467, "learning_rate": 1.8131792174868823e-07, "loss": 0.3098, "step": 5890, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4769921436588103, "success_rate.epoch.env.math": 0.9132653061224489, "success_rate.epoch.env.sat": 0.10479041916167664, "success_rate.epoch.env.science": 0.7854961832061069, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5155867934422919, "success_rate.epoch.global": 0.7246734039930983, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982212160413971, "tokens_p.mean_in_band": 0.7091346153846154, "tokens_rate.above_band": 0.967459324155194, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03254067584480601 }, { "epoch": 0.9798869680851063, "grad_norm": 121.02380918566524, "learning_rate": 1.8129133450977095e-07, "loss": 0.3252, "step": 5895, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4769921436588103, "success_rate.epoch.env.math": 0.9134860050890585, "success_rate.epoch.env.sat": 0.10479041916167664, "success_rate.epoch.env.science": 0.7853872567722243, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5155969545816307, "success_rate.epoch.global": 0.7247197930779653, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9862753378378378, "tokens_p.mean_in_band": 0.7822265625, "tokens_rate.above_band": 0.9487179487179487, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05128205128205128 }, { "epoch": 0.980718085106383, "grad_norm": 124.96733330899657, "learning_rate": 1.8126474497905126e-07, "loss": 0.3787, "step": 5900, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47757847533632286, "success_rate.epoch.env.math": 0.9135593220338983, "success_rate.epoch.env.sat": 0.10479041916167664, "success_rate.epoch.env.science": 0.7855100095328885, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5156680819800867, "success_rate.epoch.global": 0.7248892171344166, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9916424418604651, "tokens_p.mean_in_band": 0.8375, "tokens_rate.above_band": 0.9717514124293786, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02824858757062147 }, { "epoch": 0.9815492021276596, "grad_norm": 177.3154196555142, "learning_rate": 1.812381532037191e-07, "loss": 0.5138, "step": 5905, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47757847533632286, "success_rate.epoch.env.math": 0.9135593220338983, "success_rate.epoch.env.sat": 0.10746268656716418, "success_rate.epoch.env.science": 0.7855917667238422, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5159184478524905, "success_rate.epoch.global": 0.7249907715023994, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971289752650176, "tokens_p.mean_in_band": 0.7462827620967742, "tokens_rate.above_band": 0.9647727272727272, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.035227272727272725 }, { "epoch": 0.9823803191489362, "grad_norm": 58.090451073990245, "learning_rate": 1.8121155923096835e-07, "loss": 0.4662, "step": 5910, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47757847533632286, "success_rate.epoch.env.math": 0.9135593220338983, "success_rate.epoch.env.sat": 0.10746268656716418, "success_rate.epoch.env.science": 0.7857958872810358, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5159370042667809, "success_rate.epoch.global": 0.7251598622725037, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977570156502968, "tokens_p.mean_in_band": 0.7930871212121212, "tokens_rate.above_band": 0.9825026511134677, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017497348886532343 }, { "epoch": 0.9832114361702128, "grad_norm": 131.82925285618356, "learning_rate": 1.8118496310799674e-07, "loss": 0.3389, "step": 5915, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47704367301231804, "success_rate.epoch.env.math": 0.9137055837563451, "success_rate.epoch.env.sat": 0.10746268656716418, "success_rate.epoch.env.science": 0.7859181731684111, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5159127992927642, "success_rate.epoch.global": 0.7252396166134185, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9986370716510904, "tokens_p.mean_in_band": 0.6609375, "tokens_rate.above_band": 0.9771689497716894, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0228310502283105 }, { "epoch": 0.9840425531914894, "grad_norm": 107.69429716799668, "learning_rate": 1.8115836488200594e-07, "loss": 0.3317, "step": 5920, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47704367301231804, "success_rate.epoch.env.math": 0.9137055837563451, "success_rate.epoch.env.sat": 0.10746268656716418, "success_rate.epoch.env.science": 0.785958904109589, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5159165021055986, "success_rate.epoch.global": 0.7252733751075071, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994140625, "tokens_p.mean_in_band": 0.7204241071428571, "tokens_rate.above_band": 0.973384030418251, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026615969581749048 }, { "epoch": 0.984873670212766, "grad_norm": 45.90095468635324, "learning_rate": 1.8113176460020125e-07, "loss": 0.3998, "step": 5925, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47704367301231804, "success_rate.epoch.env.math": 0.9139966273187183, "success_rate.epoch.env.sat": 0.10746268656716418, "success_rate.epoch.env.science": 0.7860810039931546, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5159540606006839, "success_rate.epoch.global": 0.7255094524920206, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9911684782608695, "tokens_p.mean_in_band": 0.83125, "tokens_rate.above_band": 0.965034965034965, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03496503496503497 }, { "epoch": 0.9857047872340425, "grad_norm": 90.68505092317156, "learning_rate": 1.8110516230979167e-07, "loss": 0.2451, "step": 5930, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47762863534675615, "success_rate.epoch.env.math": 0.9139966273187183, "success_rate.epoch.env.sat": 0.10746268656716418, "success_rate.epoch.env.science": 0.7862841945288754, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5160257108616075, "success_rate.epoch.global": 0.725711481844946, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994381051175657, "tokens_p.mean_in_band": 0.5784505208333334, "tokens_rate.above_band": 0.9678714859437751, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0321285140562249 }, { "epoch": 0.9865359042553191, "grad_norm": 68.6277486779169, "learning_rate": 1.8107855805798974e-07, "loss": 0.3264, "step": 5935, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4782122905027933, "success_rate.epoch.env.math": 0.9140690817186183, "success_rate.epoch.env.sat": 0.10746268656716418, "success_rate.epoch.env.science": 0.7864059236757167, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.516096423470951, "success_rate.epoch.global": 0.7258796126026725, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9967196132596685, "tokens_p.mean_in_band": 0.640869140625, "tokens_rate.above_band": 0.9826275787187839, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01737242128121607 }, { "epoch": 0.9873670212765957, "grad_norm": 120.3988100624948, "learning_rate": 1.810519518920115e-07, "loss": 0.3988, "step": 5940, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4782122905027933, "success_rate.epoch.env.math": 0.9141414141414141, "success_rate.epoch.env.sat": 0.10746268656716418, "success_rate.epoch.env.science": 0.786486999430632, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5161103696689247, "success_rate.epoch.global": 0.7259803921568627, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9910954301075269, "tokens_p.mean_in_band": 0.7744140625, "tokens_rate.above_band": 0.9789473684210527, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021052631578947368 }, { "epoch": 0.9881981382978723, "grad_norm": 81.62728703919721, "learning_rate": 1.8102534385907637e-07, "loss": 0.4603, "step": 5945, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4731182795698925, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4782122905027933, "success_rate.epoch.env.math": 0.9142136248948697, "success_rate.epoch.env.sat": 0.10746268656716418, "success_rate.epoch.env.science": 0.786608497723824, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5161279795822563, "success_rate.epoch.global": 0.7261146496815286, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9939365671641791, "tokens_p.mean_in_band": 0.6722470238095238, "tokens_rate.above_band": 0.9054054054054054, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0945945945945946 }, { "epoch": 0.989029255319149, "grad_norm": 128.11085635184202, "learning_rate": 1.809987340064071e-07, "loss": 0.4587, "step": 5950, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47593582887700536, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4782122905027933, "success_rate.epoch.env.math": 0.9144295302013423, "success_rate.epoch.env.sat": 0.10746268656716418, "success_rate.epoch.env.science": 0.7867298578199052, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5164147809194988, "success_rate.epoch.global": 0.7263492840533594, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9964102176541717, "tokens_p.mean_in_band": 0.609375, "tokens_rate.above_band": 0.9605110336817654, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03948896631823461 }, { "epoch": 0.9898603723404256, "grad_norm": 74.15362753893264, "learning_rate": 1.809721223812296e-07, "loss": 0.2818, "step": 5955, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47593582887700536, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4782122905027933, "success_rate.epoch.env.math": 0.9144295302013423, "success_rate.epoch.env.sat": 0.10746268656716418, "success_rate.epoch.env.science": 0.7868106878908471, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5164221291077662, "success_rate.epoch.global": 0.7264162486235165, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.992870145631068, "tokens_p.mean_in_band": 0.7682291666666666, "tokens_rate.above_band": 0.9856459330143541, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014354066985645933 }, { "epoch": 0.9906914893617021, "grad_norm": 137.5463715068484, "learning_rate": 1.8094550903077308e-07, "loss": 0.3272, "step": 5960, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4787234042553192, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.4782122905027933, "success_rate.epoch.env.math": 0.9144295302013423, "success_rate.epoch.env.sat": 0.10714285714285714, "success_rate.epoch.env.science": 0.786891456715287, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5166538122694431, "success_rate.epoch.global": 0.7264277852513147, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986111111111111, "tokens_p.mean_in_band": 0.6121199324324325, "tokens_rate.above_band": 0.96051227321238, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03948772678762007 }, { "epoch": 0.9915226063829787, "grad_norm": 112.61926576065476, "learning_rate": 1.8091889400226964e-07, "loss": 0.4744, "step": 5965, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4787234042553192, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47767857142857145, "success_rate.epoch.env.math": 0.914501257334451, "success_rate.epoch.env.sat": 0.10714285714285714, "success_rate.epoch.env.science": 0.78697216436281, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5166191500609348, "success_rate.epoch.global": 0.7264393105977265, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9989551671732523, "tokens_p.mean_in_band": 0.6417410714285714, "tokens_rate.above_band": 0.9591836734693877, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04081632653061224 }, { "epoch": 0.9923537234042553, "grad_norm": 52.42686366849415, "learning_rate": 1.8089227734295448e-07, "loss": 0.419, "step": 5970, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47619047619047616, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47714604236343366, "success_rate.epoch.env.math": 0.9146443514644351, "success_rate.epoch.env.sat": 0.10682492581602374, "success_rate.epoch.env.science": 0.787052810902896, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5163319093530491, "success_rate.epoch.global": 0.7263067904250122, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9974769319492502, "tokens_p.mean_in_band": 0.6024169921875, "tokens_rate.above_band": 0.96440489432703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.035595105672969966 }, { "epoch": 0.9931848404255319, "grad_norm": 116.20765040075962, "learning_rate": 1.8086565910006565e-07, "loss": 0.4298, "step": 5975, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47619047619047616, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5126903553299492, "success_rate.epoch.env.logic": 0.47714604236343366, "success_rate.epoch.env.math": 0.9147157190635451, "success_rate.epoch.env.sat": 0.10682492581602374, "success_rate.epoch.env.science": 0.7872139209381501, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5163530436834459, "success_rate.epoch.global": 0.7264738191138777, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9762414383561644, "tokens_p.mean_in_band": 0.833984375, "tokens_rate.above_band": 0.9733333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02666666666666667 }, { "epoch": 0.9940159574468085, "grad_norm": 51.800890905158255, "learning_rate": 1.8083903932084396e-07, "loss": 0.3847, "step": 5980, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47619047619047616, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5151515151515151, "success_rate.epoch.env.logic": 0.47714604236343366, "success_rate.epoch.env.math": 0.9147157190635451, "success_rate.epoch.env.sat": 0.10682492581602374, "success_rate.epoch.env.science": 0.7873747873747874, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.516591409706919, "success_rate.epoch.global": 0.7266406440595267, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980273046092184, "tokens_p.mean_in_band": 0.6154119318181818, "tokens_rate.above_band": 0.967992240543162, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03200775945683802 }, { "epoch": 0.9948470744680851, "grad_norm": 60.096989962830364, "learning_rate": 1.8081241805253304e-07, "loss": 0.3745, "step": 5985, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47619047619047616, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5151515151515151, "success_rate.epoch.env.logic": 0.477728285077951, "success_rate.epoch.env.math": 0.9147869674185464, "success_rate.epoch.env.sat": 0.10682492581602374, "success_rate.epoch.env.science": 0.7874551294162101, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5166581218079136, "success_rate.epoch.global": 0.726773957571324, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.8666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9906572164948454, "tokens_p.mean_below_band": 5.400124791776761e-13, "tokens_p.mean_in_band": 0.6814453125, "tokens_rate.above_band": 0.9023255813953488, "tokens_rate.below_band": 0.004651162790697674, "tokens_rate.in_band": 0.09302325581395349 }, { "epoch": 0.9956781914893617, "grad_norm": 66.08985378757245, "learning_rate": 1.8078579534237912e-07, "loss": 0.3686, "step": 5990, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.47619047619047616, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5151515151515151, "success_rate.epoch.env.logic": 0.47888888888888886, "success_rate.epoch.env.math": 0.9147869674185464, "success_rate.epoch.env.sat": 0.10682492581602374, "success_rate.epoch.env.science": 0.7875354107648725, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5167709295496954, "success_rate.epoch.global": 0.7269071411162564, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9998439450686641, "tokens_p.mean_in_band": 0.6608072916666666, "tokens_rate.above_band": 0.9709090909090909, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02909090909090909 }, { "epoch": 0.9965093085106383, "grad_norm": 41.40777159785019, "learning_rate": 1.8075917123763099e-07, "loss": 0.3019, "step": 5995, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4789473684210526, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5151515151515151, "success_rate.epoch.env.logic": 0.47888888888888886, "success_rate.epoch.env.math": 0.9147869674185464, "success_rate.epoch.env.sat": 0.10682492581602374, "success_rate.epoch.env.science": 0.7876957916588035, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5170361361973779, "success_rate.epoch.global": 0.7270734380708805, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9951978211009175, "tokens_p.mean_in_band": 0.82470703125, "tokens_rate.above_band": 0.9819819819819819, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018018018018018018 }, { "epoch": 0.9973404255319149, "grad_norm": 131.32301301341676, "learning_rate": 1.8073254578553986e-07, "loss": 0.3805, "step": 6000, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4789473684210526, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5151515151515151, "success_rate.epoch.env.logic": 0.47888888888888886, "success_rate.epoch.env.math": 0.9148580968280468, "success_rate.epoch.env.sat": 0.10682492581602374, "success_rate.epoch.env.science": 0.7877758913412564, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5170498842966463, "success_rate.epoch.global": 0.7271731190650109, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9977859311740891, "tokens_p.mean_in_band": 0.671875, "tokens_rate.above_band": 0.9801587301587301, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01984126984126984 }, { "epoch": 0.9981715425531915, "grad_norm": 86.19276176012748, "learning_rate": 1.8070591903335947e-07, "loss": 0.2964, "step": 6005, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4789473684210526, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5151515151515151, "success_rate.epoch.env.logic": 0.47888888888888886, "success_rate.epoch.env.math": 0.9150707743547044, "success_rate.epoch.env.sat": 0.10682492581602374, "success_rate.epoch.env.science": 0.7880158281514981, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5170910310545463, "success_rate.epoch.global": 0.7274717256475739, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9890625, "tokens_p.mean_in_band": 0.7578125, "tokens_rate.above_band": 0.9302325581395349, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06976744186046512 }, { "epoch": 0.9990026595744681, "grad_norm": 145.82618237619, "learning_rate": 1.8067929102834572e-07, "loss": 0.4313, "step": 6010, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4789473684210526, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5151515151515151, "success_rate.epoch.env.logic": 0.47888888888888886, "success_rate.epoch.env.math": 0.9152119700748129, "success_rate.epoch.env.sat": 0.10682492581602374, "success_rate.epoch.env.science": 0.7882153614457831, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5171220064194911, "success_rate.epoch.global": 0.7277035236938032, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.995042768273717, "tokens_p.mean_in_band": 0.6756036931818182, "tokens_rate.above_band": 0.9669172932330827, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03308270676691729 }, { "epoch": 0.9998337765957447, "grad_norm": 112.2731779726927, "learning_rate": 1.8065266181775683e-07, "loss": 0.4546, "step": 6015, "success_rate.epoch.env.abd": 0.46938775510204084, "success_rate.epoch.env.agentgym:alfworld": 0.4789473684210526, "success_rate.epoch.env.agentgym:sciworld": 0.2857142857142857, "success_rate.epoch.env.agentgym:textcraft": 0.4, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5151515151515151, "success_rate.epoch.env.logic": 0.47888888888888886, "success_rate.epoch.env.math": 0.9152119700748129, "success_rate.epoch.env.sat": 0.10682492581602374, "success_rate.epoch.env.science": 0.7883747178329571, "success_rate.epoch.env.webshop": 0.25, "success_rate.epoch.env_macro_mean": 0.5171364933637798, "success_rate.epoch.global": 0.7278358027690066, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9983678343949045, "tokens_p.mean_in_band": 0.6527157738095238, "tokens_rate.above_band": 0.9739454094292804, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026054590570719603 }, { "epoch": 1.0006648936170213, "grad_norm": 81.4455954098103, "learning_rate": 1.8062603144885308e-07, "loss": 0.4109, "step": 6020, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9953100315955766, "tokens_p.mean_in_band": 0.7556818181818182, "tokens_rate.above_band": 0.9829192546583851, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017080745341614908 }, { "epoch": 1.001496010638298, "grad_norm": 86.29712614751085, "learning_rate": 1.8059939996889688e-07, "loss": 0.3937, "step": 6025, "success_rate.epoch.env.logic": 0.3333333333333333, "success_rate.epoch.env.science": 1.0, "success_rate.epoch.env_macro_mean": 0.6666666666666666, "success_rate.epoch.global": 0.8, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9960842148087876, "tokens_p.mean_in_band": 0.5748355263157895, "tokens_rate.above_band": 0.9700078926598263, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02999210734017364 }, { "epoch": 1.0023271276595744, "grad_norm": 86.52106065074864, "learning_rate": 1.8057276742515258e-07, "loss": 0.4293, "step": 6030, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.science": 0.8125, "success_rate.epoch.env_macro_mean": 0.65625, "success_rate.epoch.global": 0.75, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9908088235294118, "tokens_p.mean_below_band": 1.9326762412674725e-10, "tokens_p.mean_in_band": 0.740625, "tokens_rate.above_band": 0.9444444444444444, "tokens_rate.below_band": 0.009259259259259259, "tokens_rate.in_band": 0.046296296296296294 }, { "epoch": 1.0031582446808511, "grad_norm": 50.01992623125245, "learning_rate": 1.8054613386488646e-07, "loss": 0.3652, "step": 6035, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.science": 0.875, "success_rate.epoch.env_macro_mean": 0.7916666666666666, "success_rate.epoch.global": 0.8333333333333334, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9872047244094488, "tokens_p.mean_in_band": 0.7990451388888888, "tokens_rate.above_band": 0.9338235294117647, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0661764705882353 }, { "epoch": 1.0039893617021276, "grad_norm": 82.87358076532286, "learning_rate": 1.805194993353665e-07, "loss": 0.5079, "step": 6040, "success_rate.epoch.env.ded": 1.0, "success_rate.epoch.env.logic": 0.4, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.0, "success_rate.epoch.env.science": 0.9, "success_rate.epoch.env_macro_mean": 0.6599999999999999, "success_rate.epoch.global": 0.825, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995078125, "tokens_p.mean_in_band": 0.6329571759259259, "tokens_rate.above_band": 0.936768149882904, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06323185011709602 }, { "epoch": 1.0048204787234043, "grad_norm": 57.394727477190806, "learning_rate": 1.804928638838625e-07, "loss": 0.3625, "step": 6045, "success_rate.epoch.env.ded": 0.6666666666666666, "success_rate.epoch.env.logic": 0.4, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.0, "success_rate.epoch.env.science": 0.9166666666666666, "success_rate.epoch.env_macro_mean": 0.5966666666666666, "success_rate.epoch.global": 0.82, "success_rate.window.env.ded": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9909754224270353, "tokens_p.mean_in_band": 0.6451198630136986, "tokens_rate.above_band": 0.8168130489335006, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18318695106649938 }, { "epoch": 1.0056515957446808, "grad_norm": 168.9779189069824, "learning_rate": 1.8046622755764583e-07, "loss": 0.4283, "step": 6050, "success_rate.epoch.env.ded": 0.75, "success_rate.epoch.env.logic": 0.3333333333333333, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.0, "success_rate.epoch.env.science": 0.9302325581395349, "success_rate.epoch.env_macro_mean": 0.6027131782945736, "success_rate.epoch.global": 0.8333333333333334, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992829106280193, "tokens_p.mean_in_band": 0.4835069444444444, "tokens_rate.above_band": 0.9787234042553191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02127659574468085 }, { "epoch": 1.0064827127659575, "grad_norm": 62.9512422906743, "learning_rate": 1.804395904039895e-07, "loss": 0.3133, "step": 6055, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.75, "success_rate.epoch.env.logic": 0.42857142857142855, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.9375, "success_rate.epoch.env_macro_mean": 0.7784863945578232, "success_rate.epoch.global": 0.8571428571428571, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9963942307692307, "tokens_p.mean_in_band": 0.7440011160714286, "tokens_rate.above_band": 0.9780564263322884, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0219435736677116 }, { "epoch": 1.007313829787234, "grad_norm": 107.14793771797173, "learning_rate": 1.8041295247016785e-07, "loss": 0.3398, "step": 6060, "success_rate.epoch.env.agentgym:alfworld": 1.0, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.75, "success_rate.epoch.env.logic": 0.375, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.9464285714285714, "success_rate.epoch.env_macro_mean": 0.772108843537415, "success_rate.epoch.global": 0.8625, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963235294117647, "tokens_p.mean_in_band": 0.7119891826923077, "tokens_rate.above_band": 0.9622641509433962, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03773584905660377 }, { "epoch": 1.0081449468085106, "grad_norm": 69.63853529149704, "learning_rate": 1.803863138034568e-07, "loss": 0.492, "step": 6065, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.75, "success_rate.epoch.env.logic": 0.375, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.9516129032258065, "success_rate.epoch.env_macro_mean": 0.7252304147465438, "success_rate.epoch.global": 0.8666666666666667, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994745575221239, "tokens_p.mean_in_band": 0.8005642361111112, "tokens_rate.above_band": 0.9617021276595744, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03829787234042553 }, { "epoch": 1.0089760638297873, "grad_norm": 36.047487996641635, "learning_rate": 1.8035967445113334e-07, "loss": 0.2962, "step": 6070, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.75, "success_rate.epoch.env.logic": 0.375, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.3333333333333333, "success_rate.epoch.env.science": 0.9420289855072463, "success_rate.epoch.env_macro_mean": 0.7238612836438924, "success_rate.epoch.global": 0.8686868686868687, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9792613636363636, "tokens_p.mean_in_band": 0.717092803030303, "tokens_rate.above_band": 0.7692307692307693, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.23076923076923078 }, { "epoch": 1.0098071808510638, "grad_norm": 26.297947848747945, "learning_rate": 1.8033303446047585e-07, "loss": 0.4284, "step": 6075, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.75, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.25, "success_rate.epoch.env.science": 0.9466666666666667, "success_rate.epoch.env_macro_mean": 0.7304761904761905, "success_rate.epoch.global": 0.8715596330275229, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9928977272727273, "tokens_p.mean_in_band": 0.7604166666666666, "tokens_rate.above_band": 0.9407894736842105, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05921052631578947 }, { "epoch": 1.0106382978723405, "grad_norm": 54.82936353437266, "learning_rate": 1.803063938787638e-07, "loss": 0.351, "step": 6080, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.8, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.9382716049382716, "success_rate.epoch.env_macro_mean": 0.7292768959435626, "success_rate.epoch.global": 0.865546218487395, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99235807860262, "tokens_p.mean_in_band": 0.7141335227272727, "tokens_rate.above_band": 0.9123505976095617, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08764940239043825 }, { "epoch": 1.011469414893617, "grad_norm": 78.10722897562525, "learning_rate": 1.8027975275327767e-07, "loss": 0.3234, "step": 6085, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.8, "success_rate.epoch.env.logic": 0.45454545454545453, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.2, "success_rate.epoch.env.science": 0.9438202247191011, "success_rate.epoch.env_macro_mean": 0.7235760494187461, "success_rate.epoch.global": 0.8682170542635659, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972826086956522, "tokens_p.mean_in_band": 0.6544596354166666, "tokens_rate.above_band": 0.9640718562874252, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03592814371257485 }, { "epoch": 1.0123005319148937, "grad_norm": 134.99715335078903, "learning_rate": 1.8025311113129895e-07, "loss": 0.5774, "step": 6090, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.8, "success_rate.epoch.env.logic": 0.42857142857142855, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.16666666666666666, "success_rate.epoch.env.science": 0.9473684210526315, "success_rate.epoch.env_macro_mean": 0.7156104547081991, "success_rate.epoch.global": 0.8561151079136691, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9997014331210191, "tokens_p.mean_in_band": 0.5558792372881356, "tokens_rate.above_band": 0.9551330798479087, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044866920152091254 }, { "epoch": 1.0131316489361701, "grad_norm": 78.76696622476783, "learning_rate": 1.8022646906010995e-07, "loss": 0.3835, "step": 6095, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6666666666666666, "success_rate.epoch.env.logic": 0.42857142857142855, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.9306930693069307, "success_rate.epoch.env_macro_mean": 0.6907792820098335, "success_rate.epoch.global": 0.8389261744966443, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.41666666666666663, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9854878917378918, "tokens_p.mean_in_band": 0.7460039511494253, "tokens_rate.above_band": 0.8897338403041825, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11026615969581749 }, { "epoch": 1.0139627659574468, "grad_norm": 74.47510169912339, "learning_rate": 1.8019982658699388e-07, "loss": 0.352, "step": 6100, "success_rate.epoch.env.agentgym:alfworld": 0.75, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6666666666666666, "success_rate.epoch.env.logic": 0.4375, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.9333333333333333, "success_rate.epoch.env_macro_mean": 0.7043367346938776, "success_rate.epoch.global": 0.8417721518987342, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994179278230501, "tokens_p.mean_in_band": 0.7449776785714286, "tokens_rate.above_band": 0.983963344788087, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016036655211912942 }, { "epoch": 1.0147938829787233, "grad_norm": 75.40758578290753, "learning_rate": 1.8017318375923458e-07, "loss": 0.4668, "step": 6105, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.8, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6666666666666666, "success_rate.epoch.env.logic": 0.47058823529411764, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.9272727272727272, "success_rate.epoch.env_macro_mean": 0.6884230965113318, "success_rate.epoch.global": 0.8392857142857143, "success_rate.window.env.abd": 0.5, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.86, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9988132911392406, "tokens_p.mean_below_band": 7.188646122813225e-09, "tokens_p.mean_in_band": 0.20199046415441177, "tokens_rate.above_band": 0.88268156424581, "tokens_rate.below_band": 0.0004297378599054577, "tokens_rate.in_band": 0.11688869789428448 }, { "epoch": 1.015625, "grad_norm": 470.57805880278977, "learning_rate": 1.801465406241165e-07, "loss": 0.2557, "step": 6110, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.8, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6666666666666666, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.9316239316239316, "success_rate.epoch.env_macro_mean": 0.6926434676434676, "success_rate.epoch.global": 0.848314606741573, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.992602657004831, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9904306220095693, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009569377990430622 }, { "epoch": 1.0164561170212767, "grad_norm": 106.38473771486551, "learning_rate": 1.8011989722892476e-07, "loss": 0.3066, "step": 6115, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.8, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.47368421052631576, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.936, "success_rate.epoch.env_macro_mean": 0.6779962406015038, "success_rate.epoch.global": 0.8457446808510638, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9931662515566625, "tokens_p.mean_in_band": 0.5221354166666666, "tokens_rate.above_band": 0.9083710407239819, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0916289592760181 }, { "epoch": 1.0172872340425532, "grad_norm": 313.3548378664921, "learning_rate": 1.8009325362094475e-07, "loss": 0.3481, "step": 6120, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.8, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.9236641221374046, "success_rate.epoch.env_macro_mean": 0.6708151581243185, "success_rate.epoch.global": 0.8383838383838383, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9889705882352942, "tokens_p.mean_in_band": 0.6956313775510204, "tokens_rate.above_band": 0.8525576730190572, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14744232698094284 }, { "epoch": 1.0181183510638299, "grad_norm": 73.79436235774392, "learning_rate": 1.8006660984746248e-07, "loss": 0.3822, "step": 6125, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.8, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5555555555555556, "success_rate.epoch.env.logic": 0.47619047619047616, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.927536231884058, "success_rate.epoch.env_macro_mean": 0.675267425810904, "success_rate.epoch.global": 0.8413461538461539, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9998553240740741, "tokens_p.mean_in_band": 0.5331307870370371, "tokens_rate.above_band": 0.9696969696969697, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030303030303030304 }, { "epoch": 1.0189494680851063, "grad_norm": 89.67487704445716, "learning_rate": 1.80039965955764e-07, "loss": 0.2539, "step": 6130, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.8, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.9310344827586207, "success_rate.epoch.env_macro_mean": 0.6695043103448276, "success_rate.epoch.global": 0.8394495412844036, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9932097615499255, "tokens_p.mean_in_band": 0.5382197342519685, "tokens_rate.above_band": 0.8408521303258145, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15914786967418545 }, { "epoch": 1.019780585106383, "grad_norm": 54.95808526334416, "learning_rate": 1.8001332199313574e-07, "loss": 0.3467, "step": 6135, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.8, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.9290322580645162, "success_rate.epoch.env_macro_mean": 0.6692540322580646, "success_rate.epoch.global": 0.8421052631578947, "success_rate.window.env.science": 0.9, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9890086206896552, "tokens_p.mean_below_band": 4.172325134277344e-07, "tokens_p.mean_in_band": 0.6712239583333334, "tokens_rate.above_band": 0.9731543624161074, "tokens_rate.below_band": 0.006711409395973154, "tokens_rate.in_band": 0.020134228187919462 }, { "epoch": 1.0206117021276595, "grad_norm": 64.5118722245273, "learning_rate": 1.7998667800686425e-07, "loss": 0.347, "step": 6140, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.8, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9259259259259259, "success_rate.epoch.env_macro_mean": 0.6657407407407407, "success_rate.epoch.global": 0.8361344537815126, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9926321138211383, "tokens_p.mean_in_band": 0.57734375, "tokens_rate.above_band": 0.8601398601398601, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13986013986013987 }, { "epoch": 1.0214428191489362, "grad_norm": 161.87507300151358, "learning_rate": 1.7996003404423597e-07, "loss": 0.4571, "step": 6145, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.6666666666666666, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9285714285714286, "success_rate.epoch.env_macro_mean": 0.6494047619047619, "success_rate.epoch.global": 0.8340080971659919, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9993191721132898, "tokens_p.mean_in_band": 0.6783854166666666, "tokens_rate.above_band": 0.9870967741935484, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012903225806451613 }, { "epoch": 1.0222739361702127, "grad_norm": 89.62905185994916, "learning_rate": 1.7993339015253752e-07, "loss": 0.414, "step": 6150, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.09090909090909091, "success_rate.epoch.env.science": 0.9310344827586207, "success_rate.epoch.env_macro_mean": 0.6433679467084639, "success_rate.epoch.global": 0.83203125, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973233404710921, "tokens_p.mean_in_band": 0.6585286458333334, "tokens_rate.above_band": 0.9511201629327902, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.048879837067209775 }, { "epoch": 1.0231050531914894, "grad_norm": 85.80830560137224, "learning_rate": 1.7990674637905524e-07, "loss": 0.304, "step": 6155, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5185185185185185, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.09090909090909091, "success_rate.epoch.env.science": 0.9337016574585635, "success_rate.epoch.env_macro_mean": 0.6460161583607715, "success_rate.epoch.global": 0.8345864661654135, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9940476190476191, "tokens_p.mean_in_band": 0.573046875, "tokens_rate.above_band": 0.9742268041237113, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02577319587628866 }, { "epoch": 1.023936170212766, "grad_norm": 57.54340260769104, "learning_rate": 1.7988010277107526e-07, "loss": 0.3435, "step": 6160, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5185185185185185, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.09090909090909091, "success_rate.epoch.env.science": 0.9312169312169312, "success_rate.epoch.env_macro_mean": 0.6457055675805676, "success_rate.epoch.global": 0.8369565217391305, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9819630872483222, "tokens_p.mean_in_band": 0.7358630952380952, "tokens_rate.above_band": 0.8764705882352941, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12352941176470589 }, { "epoch": 1.0247672872340425, "grad_norm": 74.82932852390653, "learning_rate": 1.7985345937588348e-07, "loss": 0.3236, "step": 6165, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.09090909090909091, "success_rate.epoch.env.science": 0.9336734693877551, "success_rate.epoch.env_macro_mean": 0.6436978200371057, "success_rate.epoch.global": 0.8385964912280702, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9989341746248295, "tokens_p.mean_in_band": 0.615625, "tokens_rate.above_band": 0.9734395750332006, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02656042496679947 }, { "epoch": 1.0255984042553192, "grad_norm": 265.49353350126614, "learning_rate": 1.7982681624076542e-07, "loss": 0.3662, "step": 6170, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.5172413793103449, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.9356435643564357, "success_rate.epoch.env_macro_mean": 0.6508341028068324, "success_rate.epoch.global": 0.8406779661016949, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9863398203592815, "tokens_p.mean_in_band": 0.8110795454545454, "tokens_rate.above_band": 0.9192660550458716, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08073394495412844 }, { "epoch": 1.0264295212765957, "grad_norm": 42.00687446440549, "learning_rate": 1.7980017341300612e-07, "loss": 0.4013, "step": 6175, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.5172413793103449, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.9330143540669856, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6893381791294677, "success_rate.epoch.global": 0.8426229508196721, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962657232704403, "tokens_p.mean_in_band": 0.4541015625, "tokens_rate.above_band": 0.9298245614035088, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07017543859649122 }, { "epoch": 1.0272606382978724, "grad_norm": 38.88415792190259, "learning_rate": 1.7977353093989004e-07, "loss": 0.4684, "step": 6180, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.5333333333333333, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.9348837209302325, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6728153629316419, "success_rate.epoch.global": 0.8444444444444444, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9901706231454006, "tokens_p.mean_in_band": 0.4854589332460733, "tokens_rate.above_band": 0.6382575757575758, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.36174242424242425 }, { "epoch": 1.0280917553191489, "grad_norm": 167.1843122622322, "learning_rate": 1.7974688886870105e-07, "loss": 0.2353, "step": 6185, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.5161290322580645, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.9363636363636364, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.670355958259184, "success_rate.epoch.global": 0.8421052631578947, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9949462166172107, "tokens_p.mean_in_band": 0.5779513888888889, "tokens_rate.above_band": 0.9374130737134909, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06258692628650904 }, { "epoch": 1.0289228723404256, "grad_norm": 74.98156226040037, "learning_rate": 1.7972024724672232e-07, "loss": 0.4866, "step": 6190, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.9385964912280702, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.66820143793828, "success_rate.epoch.global": 0.8408408408408409, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9991444524495677, "tokens_p.mean_in_band": 0.6401909722222222, "tokens_rate.above_band": 0.9506849315068493, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049315068493150684 }, { "epoch": 1.029753989361702, "grad_norm": 60.056797413462895, "learning_rate": 1.796936061212362e-07, "loss": 0.4229, "step": 6195, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.625, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.5, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.9361702127659575, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6679318514424897, "success_rate.epoch.global": 0.8396501457725948, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9943265007320644, "tokens_p.mean_below_band": 2.648448571562767e-09, "tokens_p.mean_in_band": 0.6730143229166666, "tokens_rate.above_band": 0.9646892655367232, "tokens_rate.below_band": 0.0014124293785310734, "tokens_rate.in_band": 0.03389830508474576 }, { "epoch": 1.0305851063829787, "grad_norm": 104.44929057153388, "learning_rate": 1.7966696553952414e-07, "loss": 0.3785, "step": 6200, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.4722222222222222, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.9377593360995851, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6573059515659793, "success_rate.epoch.global": 0.8352272727272727, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9991003787878788, "tokens_p.mean_in_band": 0.5951171875, "tokens_rate.above_band": 0.9777777777777777, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022222222222222223 }, { "epoch": 1.0314162234042552, "grad_norm": 99.00103191760212, "learning_rate": 1.7964032554886666e-07, "loss": 0.4355, "step": 6205, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.4594594594594595, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9397590361445783, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6555809551793488, "success_rate.epoch.global": 0.8342541436464088, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9946516973125884, "tokens_p.mean_in_band": 0.5326360887096774, "tokens_rate.above_band": 0.9579945799457995, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04200542005420054 }, { "epoch": 1.032247340425532, "grad_norm": 93.44815773226698, "learning_rate": 1.796136861965432e-07, "loss": 0.3163, "step": 6210, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.475, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9372549019607843, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6570294447745427, "success_rate.epoch.global": 0.8328840970350404, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7619047619047619, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942839951865222, "tokens_p.mean_below_band": 9.549694368615746e-12, "tokens_p.mean_in_band": 0.6758840460526315, "tokens_rate.above_band": 0.9551724137931035, "tokens_rate.below_band": 0.0011494252873563218, "tokens_rate.in_band": 0.04367816091954023 }, { "epoch": 1.0330784574468086, "grad_norm": 179.41227965376063, "learning_rate": 1.7958704752983215e-07, "loss": 0.301, "step": 6215, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.4878048780487805, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.9391634980988594, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.658201312276786, "success_rate.epoch.global": 0.8346456692913385, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9905711206896551, "tokens_p.mean_in_band": 0.7565104166666666, "tokens_rate.above_band": 0.928, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.072 }, { "epoch": 1.033909574468085, "grad_norm": 75.83439667964703, "learning_rate": 1.795604095960105e-07, "loss": 0.2932, "step": 6220, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.4883720930232558, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.937037037037037, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6580280627115251, "success_rate.epoch.global": 0.8333333333333334, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9874387254901961, "tokens_p.mean_in_band": 0.6391059027777778, "tokens_rate.above_band": 0.9577464788732394, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04225352112676056 }, { "epoch": 1.0347406914893618, "grad_norm": 77.95757712974655, "learning_rate": 1.7953377244235416e-07, "loss": 0.4604, "step": 6225, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.4883720930232558, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.9347826086956522, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6573690739415674, "success_rate.epoch.global": 0.8316582914572864, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5666666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9870125145518044, "tokens_p.mean_in_band": 0.7482341609589042, "tokens_rate.above_band": 0.9216738197424893, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07832618025751073 }, { "epoch": 1.0355718085106382, "grad_norm": 59.45374740288453, "learning_rate": 1.795071361161375e-07, "loss": 0.488, "step": 6230, "success_rate.epoch.env.abd": 0.3333333333333333, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.4666666666666667, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.9363957597173145, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6551365989043533, "success_rate.epoch.global": 0.8308823529411765, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9928075396825397, "tokens_p.mean_in_band": 0.6340401785714286, "tokens_rate.above_band": 0.9557522123893806, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04424778761061947 }, { "epoch": 1.036402925531915, "grad_norm": 56.436265925759805, "learning_rate": 1.7948050066463348e-07, "loss": 0.3691, "step": 6235, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.5555555555555556, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.4666666666666667, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.9342560553633218, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6734173724946504, "success_rate.epoch.global": 0.8321342925659473, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9905405405405405, "tokens_p.mean_below_band": 9.74978320300579e-10, "tokens_p.mean_in_band": 0.873046875, "tokens_rate.above_band": 0.9840425531914894, "tokens_rate.below_band": 0.005319148936170213, "tokens_rate.in_band": 0.010638297872340425 }, { "epoch": 1.0372340425531914, "grad_norm": 66.65812229181546, "learning_rate": 1.7945386613511354e-07, "loss": 0.4033, "step": 6240, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.5454545454545454, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.4782608695652174, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.9355932203389831, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6737318566916728, "success_rate.epoch.global": 0.8337236533957846, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9950579073482428, "tokens_p.mean_in_band": 0.6744791666666666, "tokens_rate.above_band": 0.990506329113924, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00949367088607595 }, { "epoch": 1.038065159574468, "grad_norm": 76.09320827494683, "learning_rate": 1.794272325748474e-07, "loss": 0.3254, "step": 6245, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.5454545454545454, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.5833333333333334, "success_rate.epoch.env.logic": 0.4791666666666667, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.9364548494983278, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6781369915960709, "success_rate.epoch.global": 0.8348623853211009, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0004812628336757, "tokens_p.mean_in_band": 0.73779296875, "tokens_rate.above_band": 0.9918533604887984, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008146639511201629 }, { "epoch": 1.0388962765957448, "grad_norm": 66.14419189038723, "learning_rate": 1.7940060003110311e-07, "loss": 0.3707, "step": 6250, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.5454545454545454, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.46, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.9375, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6796847433612139, "success_rate.epoch.global": 0.8337078651685393, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9970873257839721, "tokens_p.mean_in_band": 0.6041666666666666, "tokens_rate.above_band": 0.9795221843003413, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020477815699658702 }, { "epoch": 1.0397273936170213, "grad_norm": 95.2713935391227, "learning_rate": 1.7937396855114691e-07, "loss": 0.4349, "step": 6255, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.46153846153846156, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.46, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.9385113268608414, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6704731036884092, "success_rate.epoch.global": 0.832967032967033, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975961538461539, "tokens_p.mean_in_band": 0.7001953125, "tokens_rate.above_band": 0.9896193771626297, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010380622837370242 }, { "epoch": 1.040558510638298, "grad_norm": 75.09997445156134, "learning_rate": 1.793473381822432e-07, "loss": 0.3879, "step": 6260, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.46153846153846156, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.05555555555555555, "success_rate.epoch.env.science": 0.9391025641025641, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6666836250169583, "success_rate.epoch.global": 0.8297413793103449, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9975609756097561, "tokens_p.mean_in_band": 0.5548023897058824, "tokens_rate.above_band": 0.9414466130884042, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05855338691159587 }, { "epoch": 1.0413896276595744, "grad_norm": 89.63266635755804, "learning_rate": 1.7932070897165428e-07, "loss": 0.5164, "step": 6265, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.4444444444444444, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.047619047619047616, "success_rate.epoch.env.science": 0.9365079365079365, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6604395604395604, "success_rate.epoch.global": 0.8185654008438819, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.3, "tokens_p.mean_above_band": 0.9976211776187378, "tokens_p.mean_in_band": 0.5923295454545454, "tokens_rate.above_band": 0.9522924411400248, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04770755885997522 }, { "epoch": 1.0422207446808511, "grad_norm": 81.12761027992248, "learning_rate": 1.7929408096664053e-07, "loss": 0.3965, "step": 6270, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.4444444444444444, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.047619047619047616, "success_rate.epoch.env.science": 0.934984520123839, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6644369586191051, "success_rate.epoch.global": 0.8198757763975155, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9942819148936171, "tokens_p.mean_below_band": 6.693881005048752e-10, "tokens_p.mean_in_band": 0.7464192708333334, "tokens_rate.above_band": 0.9730848861283644, "tokens_rate.below_band": 0.002070393374741201, "tokens_rate.in_band": 0.024844720496894408 }, { "epoch": 1.0430518617021276, "grad_norm": 79.21504409639203, "learning_rate": 1.7926745421446013e-07, "loss": 0.3663, "step": 6275, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.4642857142857143, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.047619047619047616, "success_rate.epoch.env.science": 0.9335347432024169, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6664804578324216, "success_rate.epoch.global": 0.821501014198783, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9934895833333334, "tokens_p.mean_below_band": 3.694822225952521e-13, "tokens_p.mean_in_band": 0.826171875, "tokens_rate.above_band": 0.9876543209876543, "tokens_rate.below_band": 0.00411522633744856, "tokens_rate.in_band": 0.00823045267489712 }, { "epoch": 1.0438829787234043, "grad_norm": 307.2952993639155, "learning_rate": 1.79240828762369e-07, "loss": 0.4458, "step": 6280, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.47058823529411764, "success_rate.epoch.env.agentgym:textcraft": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.47368421052631576, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.045454545454545456, "success_rate.epoch.env.science": 0.9317507418397626, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6818735942777062, "success_rate.epoch.global": 0.8210735586481114, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.992853139013453, "tokens_p.mean_in_band": 0.6858974358974359, "tokens_rate.above_band": 0.9195876288659793, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08041237113402062 }, { "epoch": 1.0447140957446808, "grad_norm": 60.99663879620861, "learning_rate": 1.7921420465762087e-07, "loss": 0.463, "step": 6285, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.47058823529411764, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.4915254237288136, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.045454545454545456, "success_rate.epoch.env.science": 0.9327485380116959, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6284112619859764, "success_rate.epoch.global": 0.8226120857699805, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9941683569979716, "tokens_p.mean_in_band": 0.700218023255814, "tokens_rate.above_band": 0.9197761194029851, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08022388059701492 }, { "epoch": 1.0455452127659575, "grad_norm": 47.09905959276588, "learning_rate": 1.7918758194746696e-07, "loss": 0.4526, "step": 6290, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.47058823529411764, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.5081967213114754, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.043478260869565216, "success_rate.epoch.env.science": 0.9310344827586207, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.629853590624266, "success_rate.epoch.global": 0.8221797323135756, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9917332848837209, "tokens_p.mean_below_band": 7.566995918750763e-10, "tokens_p.mean_in_band": 0.7288411458333334, "tokens_rate.above_band": 0.9322493224932249, "tokens_rate.below_band": 0.0027100271002710027, "tokens_rate.in_band": 0.06504065040650407 }, { "epoch": 1.046376329787234, "grad_norm": 51.89258922885982, "learning_rate": 1.7916096067915603e-07, "loss": 0.3259, "step": 6295, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.47058823529411764, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.5161290322580645, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.9323943661971831, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6304303931679188, "success_rate.epoch.global": 0.8236397748592871, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9924469899169632, "tokens_p.mean_below_band": 5.327165126800537e-07, "tokens_p.mean_in_band": 0.5034611600449775, "tokens_rate.above_band": 0.8344469190794358, "tokens_rate.below_band": 0.0004949269982677555, "tokens_rate.in_band": 0.16505815392229647 }, { "epoch": 1.0472074468085106, "grad_norm": 59.66471598390848, "learning_rate": 1.7913434089993435e-07, "loss": 0.31, "step": 6300, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.47058823529411764, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5238095238095238, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.9279778393351801, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6261898813080172, "success_rate.epoch.global": 0.8195211786372008, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.993120884289746, "tokens_p.mean_in_band": 0.6705643503289473, "tokens_rate.above_band": 0.8748971193415638, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12510288065843622 }, { "epoch": 1.0480385638297873, "grad_norm": 77.71552600905194, "learning_rate": 1.791077226570455e-07, "loss": 0.3766, "step": 6305, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5303030303030303, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.08, "success_rate.epoch.env.science": 0.9291553133514986, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6241373468258118, "success_rate.epoch.global": 0.8191681735985533, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9967592592592592, "tokens_p.mean_in_band": 0.5816200657894737, "tokens_rate.above_band": 0.9827115559599636, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017288444040036398 }, { "epoch": 1.0488696808510638, "grad_norm": 39.880896829744565, "learning_rate": 1.7908110599773035e-07, "loss": 0.4901, "step": 6310, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5373134328358209, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.93048128342246, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6247217301065705, "success_rate.epoch.global": 0.8206039076376554, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9918478260869565, "tokens_p.mean_in_band": 0.7622282608695652, "tokens_rate.above_band": 0.9230769230769231, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07692307692307693 }, { "epoch": 1.0497007978723405, "grad_norm": 32.31999004305457, "learning_rate": 1.7905449096922691e-07, "loss": 0.2728, "step": 6315, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5441176470588235, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.9315789473684211, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6255997165697887, "success_rate.epoch.global": 0.8237347294938918, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9911202185792349, "tokens_p.mean_in_band": 0.82578125, "tokens_rate.above_band": 0.973404255319149, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026595744680851064 }, { "epoch": 1.050531914893617, "grad_norm": 56.92973727992401, "learning_rate": 1.790278776187704e-07, "loss": 0.2816, "step": 6320, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5362318840579711, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.9305912596401028, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6246137775998809, "success_rate.epoch.global": 0.823327615780446, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9963037634408602, "tokens_p.mean_in_band": 0.6315569196428571, "tokens_rate.above_band": 0.9587628865979382, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041237113402061855 }, { "epoch": 1.0513630319148937, "grad_norm": 57.56350150387045, "learning_rate": 1.790012659935929e-07, "loss": 0.2943, "step": 6325, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5362318840579711, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.9321608040201005, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6247881714198806, "success_rate.epoch.global": 0.8263069139966274, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.98974609375, "tokens_p.mean_in_band": 0.8359375, "tokens_rate.above_band": 0.9770992366412213, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022900763358778626 }, { "epoch": 1.0521941489361701, "grad_norm": 108.81535078273247, "learning_rate": 1.7897465614092362e-07, "loss": 0.3824, "step": 6330, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5555555555555556, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.9328358208955224, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6270102479057703, "success_rate.epoch.global": 0.8289036544850499, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962201873935264, "tokens_p.mean_in_band": 0.6884765625, "tokens_rate.above_band": 0.9966044142614601, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003395585738539898 }, { "epoch": 1.0530252659574468, "grad_norm": 1050.5492452358776, "learning_rate": 1.7894804810798848e-07, "loss": 0.3721, "step": 6335, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5466666666666666, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.9339853300733496, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6261503168267635, "success_rate.epoch.global": 0.8284313725490197, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9963932181971357, "tokens_p.mean_in_band": 0.512890625, "tokens_rate.above_band": 0.967400162999185, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032599837000814993 }, { "epoch": 1.0538563829787233, "grad_norm": 90.25205218720191, "learning_rate": 1.7892144194201025e-07, "loss": 0.3748, "step": 6340, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5454545454545454, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.9346246973365617, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5634780097491962, "success_rate.epoch.global": 0.8276972624798712, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9962559117183395, "tokens_p.mean_below_band": 5.7980287238024175e-12, "tokens_p.mean_in_band": 0.602156929347826, "tokens_rate.above_band": 0.9758974358974359, "tokens_rate.below_band": 0.0005128205128205128, "tokens_rate.in_band": 0.02358974358974359 }, { "epoch": 1.0546875, "grad_norm": 110.66002278027692, "learning_rate": 1.7889483769020832e-07, "loss": 0.3198, "step": 6345, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5487804878048781, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.9328537170263789, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5636335059532112, "success_rate.epoch.global": 0.8256735340729001, "success_rate.window.env.logic": 0.6, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.7833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9983033749082906, "tokens_p.mean_below_band": 1.2514647096395493e-09, "tokens_p.mean_in_band": 0.6125, "tokens_rate.above_band": 0.9777618364418939, "tokens_rate.below_band": 0.0007173601147776184, "tokens_rate.in_band": 0.021520803443328552 }, { "epoch": 1.0555186170212767, "grad_norm": 129.08926692631837, "learning_rate": 1.7886823539979875e-07, "loss": 0.2167, "step": 6350, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5487804878048781, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.9314420803782506, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5634923422883983, "success_rate.epoch.global": 0.8268330733229329, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9166666666666667, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.987037037037037, "tokens_p.mean_in_band": 0.74765625, "tokens_rate.above_band": 0.9310344827586207, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06896551724137931 }, { "epoch": 1.0563497340425532, "grad_norm": 67.39747282118377, "learning_rate": 1.7884163511799405e-07, "loss": 0.3422, "step": 6355, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5487804878048781, "success_rate.epoch.env.math": 1.0, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.9327146171693735, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5670383993863141, "success_rate.epoch.global": 0.8294930875576036, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.985479797979798, "tokens_p.mean_in_band": 0.826171875, "tokens_rate.above_band": 0.9705882352941176, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029411764705882353 }, { "epoch": 1.0571808510638299, "grad_norm": 41.85017480121167, "learning_rate": 1.7881503689200325e-07, "loss": 0.3681, "step": 6360, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5487804878048781, "success_rate.epoch.env.math": 0.9861111111111112, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.9269406392694064, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5650721127074284, "success_rate.epoch.global": 0.8260211800302572, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9877717391304348, "tokens_p.mean_below_band": 1.0878364875566149e-10, "tokens_p.mean_in_band": 0.6533954326923077, "tokens_rate.above_band": 0.8313253012048193, "tokens_rate.below_band": 0.012048192771084338, "tokens_rate.in_band": 0.1566265060240964 }, { "epoch": 1.0580119680851063, "grad_norm": 66.71791944353244, "learning_rate": 1.7878844076903165e-07, "loss": 0.2858, "step": 6365, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5882352941176471, "success_rate.epoch.env.logic": 0.5476190476190477, "success_rate.epoch.env.math": 0.9861111111111112, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.9255079006772009, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5703028909080563, "success_rate.epoch.global": 0.8253731343283582, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.825, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.991164353859497, "tokens_p.mean_in_band": 0.5632560483870968, "tokens_rate.above_band": 0.9738175675675675, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026182432432432432 }, { "epoch": 1.058843085106383, "grad_norm": 59.06345597476102, "learning_rate": 1.787618467962809e-07, "loss": 0.4183, "step": 6370, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.42105263157894735, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5882352941176471, "success_rate.epoch.env.logic": 0.5402298850574713, "success_rate.epoch.env.math": 0.9861111111111112, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.9220489977728286, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5668789030749116, "success_rate.epoch.global": 0.8205882352941176, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.996594982078853, "tokens_p.mean_in_band": 0.5979073660714286, "tokens_rate.above_band": 0.9522184300341296, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04778156996587031 }, { "epoch": 1.0596742021276595, "grad_norm": 368.11241159578213, "learning_rate": 1.7873525502094873e-07, "loss": 0.2308, "step": 6375, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.42105263157894735, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5882352941176471, "success_rate.epoch.env.logic": 0.5454545454545454, "success_rate.epoch.env.math": 0.9861111111111112, "success_rate.epoch.env.sat": 0.10714285714285714, "success_rate.epoch.env.science": 0.9234135667396062, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5671410006144715, "success_rate.epoch.global": 0.8217391304347826, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963835311572701, "tokens_p.mean_in_band": 0.6975911458333334, "tokens_rate.above_band": 0.9656160458452722, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034383954154727794 }, { "epoch": 1.0605053191489362, "grad_norm": 88.12338112833083, "learning_rate": 1.7870866549022904e-07, "loss": 0.3233, "step": 6380, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.42105263157894735, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5882352941176471, "success_rate.epoch.env.logic": 0.5454545454545454, "success_rate.epoch.env.math": 0.9864864864864865, "success_rate.epoch.env.sat": 0.10714285714285714, "success_rate.epoch.env.science": 0.9225806451612903, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5670952459941774, "success_rate.epoch.global": 0.8228571428571428, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.985248447204969, "tokens_p.mean_below_band": 2.9976945370435715e-09, "tokens_p.mean_in_band": 0.787109375, "tokens_rate.above_band": 0.9583333333333334, "tokens_rate.below_band": 0.005952380952380952, "tokens_rate.in_band": 0.03571428571428571 }, { "epoch": 1.0613364361702127, "grad_norm": 1415.1440546492604, "learning_rate": 1.7868207825131177e-07, "loss": 0.4223, "step": 6385, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.47619047619047616, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5555555555555556, "success_rate.epoch.env.logic": 0.5454545454545454, "success_rate.epoch.env.math": 0.9866666666666667, "success_rate.epoch.env.sat": 0.10714285714285714, "success_rate.epoch.env.science": 0.9234042553191489, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5694414356329249, "success_rate.epoch.global": 0.8236953455571228, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9874859392575928, "tokens_p.mean_in_band": 0.47511691560324826, "tokens_rate.above_band": 0.6734848484848485, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.32651515151515154 }, { "epoch": 1.0621675531914894, "grad_norm": 77.17081729545556, "learning_rate": 1.786554933513827e-07, "loss": 0.249, "step": 6390, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.47619047619047616, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5555555555555556, "success_rate.epoch.env.logic": 0.5555555555555556, "success_rate.epoch.env.math": 0.9868421052631579, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.9242105263157895, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5701802494742603, "success_rate.epoch.global": 0.8245125348189415, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9953703703703703, "tokens_p.mean_in_band": 0.7235753676470589, "tokens_rate.above_band": 0.945859872611465, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.054140127388535034 }, { "epoch": 1.062998670212766, "grad_norm": 54.746022349568634, "learning_rate": 1.7862891083762354e-07, "loss": 0.3422, "step": 6395, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.47619047619047616, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5555555555555556, "success_rate.epoch.env.logic": 0.5494505494505495, "success_rate.epoch.env.math": 0.9873417721518988, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.9251559251559252, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5697142554366474, "success_rate.epoch.global": 0.8255494505494505, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9997550940438872, "tokens_p.mean_in_band": 0.4545641447368421, "tokens_rate.above_band": 0.9710806697108066, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0289193302891933 }, { "epoch": 1.0638297872340425, "grad_norm": 691.6460849750772, "learning_rate": 1.7860233075721175e-07, "loss": 0.2917, "step": 6400, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.5, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5555555555555556, "success_rate.epoch.env.logic": 0.5483870967741935, "success_rate.epoch.env.math": 0.9875, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.9260780287474333, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5720968956939252, "success_rate.epoch.global": 0.8265582655826558, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971846846846847, "tokens_p.mean_in_band": 0.6029947916666667, "tokens_rate.above_band": 0.9866666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013333333333333334 }, { "epoch": 1.0646609042553192, "grad_norm": 84.08086629675827, "learning_rate": 1.7857575315732046e-07, "loss": 0.3328, "step": 6405, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.5, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5789473684210527, "success_rate.epoch.env.logic": 0.5425531914893617, "success_rate.epoch.env.math": 0.9875, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.9271255060728745, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5739574341845357, "success_rate.epoch.global": 0.8273092369477911, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954007056451613, "tokens_p.mean_in_band": 0.6981336805555556, "tokens_rate.above_band": 0.9821782178217822, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01782178217821782 }, { "epoch": 1.0654920212765957, "grad_norm": 580.8719720104739, "learning_rate": 1.7854917808511837e-07, "loss": 0.4677, "step": 6410, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.5, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5789473684210527, "success_rate.epoch.env.logic": 0.5473684210526316, "success_rate.epoch.env.math": 0.9875, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9243027888446215, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5738118578318305, "success_rate.epoch.global": 0.8256274768824307, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9911629098360656, "tokens_p.mean_below_band": 5.502442945726216e-11, "tokens_p.mean_in_band": 0.6705729166666666, "tokens_rate.above_band": 0.8591549295774648, "tokens_rate.below_band": 0.0035211267605633804, "tokens_rate.in_band": 0.13732394366197184 }, { "epoch": 1.0663231382978724, "grad_norm": 50.41168610189158, "learning_rate": 1.7852260558776978e-07, "loss": 0.3104, "step": 6415, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.5, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.55, "success_rate.epoch.env.logic": 0.5416666666666666, "success_rate.epoch.env.math": 0.9875, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.9250493096646942, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5629215976331361, "success_rate.epoch.global": 0.8226857887874837, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9934703712517848, "tokens_p.mean_in_band": 0.537900967413442, "tokens_rate.above_band": 0.8953760920519923, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10462390794800767 }, { "epoch": 1.0671542553191489, "grad_norm": 69.21058405606163, "learning_rate": 1.784960357124343e-07, "loss": 0.3487, "step": 6420, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.5, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.55, "success_rate.epoch.env.logic": 0.5510204081632653, "success_rate.epoch.env.math": 0.9875, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.9259259259259259, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5710874905517762, "success_rate.epoch.global": 0.8247422680412371, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9933928571428572, "tokens_p.mean_in_band": 0.6655815972222222, "tokens_rate.above_band": 0.9510869565217391, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04891304347826087 }, { "epoch": 1.0679853723404256, "grad_norm": 71.70631679231734, "learning_rate": 1.7846946850626697e-07, "loss": 0.3446, "step": 6425, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.5, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.5555555555555556, "success_rate.epoch.env.math": 0.9876543209876543, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.9267822736030829, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5666420721574864, "success_rate.epoch.global": 0.8256997455470738, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9885817307692307, "tokens_p.mean_below_band": 8.307397365570068e-07, "tokens_p.mean_in_band": 0.24333618936426635, "tokens_rate.above_band": 0.22934076137418755, "tokens_rate.below_band": 0.00023212627669452182, "tokens_rate.in_band": 0.7704271123491179 }, { "epoch": 1.068816489361702, "grad_norm": 24.557658215998572, "learning_rate": 1.784429040164181e-07, "loss": 0.2392, "step": 6430, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4782608695652174, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.5555555555555556, "success_rate.epoch.env.math": 0.9759036144578314, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.9276190476190476, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5629979779838343, "success_rate.epoch.global": 0.8241206030150754, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.375, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9966643258426966, "tokens_p.mean_below_band": 6.314393452555578e-16, "tokens_p.mean_in_band": 0.6741071428571429, "tokens_rate.above_band": 0.9595687331536388, "tokens_rate.below_band": 0.0026954177897574125, "tokens_rate.in_band": 0.03773584905660377 }, { "epoch": 1.0696476063829787, "grad_norm": 102.28341051783671, "learning_rate": 1.7841634229003313e-07, "loss": 0.5238, "step": 6435, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.4782608695652174, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.5555555555555556, "success_rate.epoch.env.math": 0.9761904761904762, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.9287054409005628, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.568690859040806, "success_rate.epoch.global": 0.826302729528536, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9941939890710383, "tokens_p.mean_in_band": 0.8984375, "tokens_rate.above_band": 0.9945652173913043, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005434782608695652 }, { "epoch": 1.0704787234042552, "grad_norm": 98.75339140149212, "learning_rate": 1.7838978337425262e-07, "loss": 0.3406, "step": 6440, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.4782608695652174, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.5555555555555556, "success_rate.epoch.env.math": 0.9770114942528736, "success_rate.epoch.env.sat": 0.12121212121212122, "success_rate.epoch.env.science": 0.9277777777777778, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5686801945347671, "success_rate.epoch.global": 0.8272058823529411, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9868881118881119, "tokens_p.mean_below_band": 5.3085386753082275e-08, "tokens_p.mean_in_band": 0.8541666666666666, "tokens_rate.above_band": 0.9727891156462585, "tokens_rate.below_band": 0.006802721088435374, "tokens_rate.in_band": 0.02040816326530612 }, { "epoch": 1.071309840425532, "grad_norm": 50.849622236817034, "learning_rate": 1.7836322731621209e-07, "loss": 0.4295, "step": 6445, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.52, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.5555555555555556, "success_rate.epoch.env.math": 0.9772727272727273, "success_rate.epoch.env.sat": 0.14705882352941177, "success_rate.epoch.env.science": 0.9285714285714286, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.575544266191325, "success_rate.epoch.global": 0.8292978208232445, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9977713178294574, "tokens_p.mean_in_band": 0.6614583333333334, "tokens_rate.above_band": 0.9862385321100917, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013761467889908258 }, { "epoch": 1.0721409574468086, "grad_norm": 55.12315168420935, "learning_rate": 1.7833667416304212e-07, "loss": 0.286, "step": 6450, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.5, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.5643564356435643, "success_rate.epoch.env.math": 0.9775280898876404, "success_rate.epoch.env.sat": 0.14705882352941177, "success_rate.epoch.env.science": 0.9274047186932849, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.5743332194738028, "success_rate.epoch.global": 0.8287425149700599, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9966887417218543, "tokens_p.mean_below_band": 6.927791673660977e-13, "tokens_p.mean_in_band": 0.6494140625, "tokens_rate.above_band": 0.993421052631579, "tokens_rate.below_band": 0.0021929824561403508, "tokens_rate.in_band": 0.0043859649122807015 }, { "epoch": 1.072972074468085, "grad_norm": 287.51365033763386, "learning_rate": 1.7831012396186802e-07, "loss": 0.3523, "step": 6455, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.5, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.5686274509803921, "success_rate.epoch.env.math": 0.978021978021978, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.9279279279279279, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6131289660701426, "success_rate.epoch.global": 0.8293838862559242, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978508364312267, "tokens_p.mean_in_band": 0.5879836309523809, "tokens_rate.above_band": 0.962432915921288, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03756708407871199 }, { "epoch": 1.0738031914893618, "grad_norm": 1.863389303347459, "learning_rate": 1.7828357675980987e-07, "loss": 0.2139, "step": 6460, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.5, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5909090909090909, "success_rate.epoch.env.logic": 0.5686274509803921, "success_rate.epoch.env.math": 0.978021978021978, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.9273049645390071, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6148432893511969, "success_rate.epoch.global": 0.8302107728337237, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9918650793650794, "tokens_p.mean_below_band": 2.2851054382044822e-11, "tokens_p.mean_in_band": 0.8486328125, "tokens_rate.above_band": 0.984375, "tokens_rate.below_band": 0.003125, "tokens_rate.in_band": 0.0125 }, { "epoch": 1.0746343085106382, "grad_norm": 46.544767362918385, "learning_rate": 1.7825703260398247e-07, "loss": 0.2961, "step": 6465, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.5, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5909090909090909, "success_rate.epoch.env.logic": 0.5728155339805825, "success_rate.epoch.env.math": 0.9782608695652174, "success_rate.epoch.env.sat": 0.14285714285714285, "success_rate.epoch.env.science": 0.9279437609841827, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6153038139865248, "success_rate.epoch.global": 0.8315911730545877, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9898897058823529, "tokens_p.mean_below_band": 6.314393452555578e-16, "tokens_p.mean_in_band": 0.8684895833333334, "tokens_rate.above_band": 0.966824644549763, "tokens_rate.below_band": 0.004739336492890996, "tokens_rate.in_band": 0.02843601895734597 }, { "epoch": 1.075465425531915, "grad_norm": 51.803318841664115, "learning_rate": 1.782304915414952e-07, "loss": 0.3279, "step": 6470, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.5, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5909090909090909, "success_rate.epoch.env.logic": 0.5673076923076923, "success_rate.epoch.env.math": 0.978494623655914, "success_rate.epoch.env.sat": 0.1388888888888889, "success_rate.epoch.env.science": 0.926829268292683, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6143622836008931, "success_rate.epoch.global": 0.8296892980437284, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4642857142857143, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9945089285714286, "tokens_p.mean_in_band": 0.6749526515151515, "tokens_rate.above_band": 0.9549795361527967, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045020463847203276 }, { "epoch": 1.0762965425531914, "grad_norm": 110.85756928903345, "learning_rate": 1.7820395361945194e-07, "loss": 0.396, "step": 6475, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.5, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5909090909090909, "success_rate.epoch.env.logic": 0.5660377358490566, "success_rate.epoch.env.math": 0.9787234042553191, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.9258620689655173, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6138384536972431, "success_rate.epoch.global": 0.8282138794084186, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9948894101876675, "tokens_p.mean_in_band": 0.6570638020833334, "tokens_rate.above_band": 0.9395465994962217, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.060453400503778336 }, { "epoch": 1.077127659574468, "grad_norm": 513.1445763710295, "learning_rate": 1.7817741888495096e-07, "loss": 0.4539, "step": 6480, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.48148148148148145, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.6086956521739131, "success_rate.epoch.env.logic": 0.5596330275229358, "success_rate.epoch.env.math": 0.9787234042553191, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.9264957264957265, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6132472711472787, "success_rate.epoch.global": 0.8267716535433071, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9951459560792716, "tokens_p.mean_in_band": 0.7315705128205128, "tokens_rate.above_band": 0.9598971722365038, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04010282776349614 }, { "epoch": 1.0779587765957448, "grad_norm": 145.08457463701905, "learning_rate": 1.7815088738508498e-07, "loss": 0.3923, "step": 6485, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.4827586206896552, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.6086956521739131, "success_rate.epoch.env.logic": 0.5596330275229358, "success_rate.epoch.env.math": 0.979381443298969, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.9271186440677966, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6134798253130872, "success_rate.epoch.global": 0.8275862068965517, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998003992015968, "tokens_p.mean_in_band": 0.719140625, "tokens_rate.above_band": 0.9901185770750988, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009881422924901186 }, { "epoch": 1.0787898936170213, "grad_norm": 12.707820758060128, "learning_rate": 1.7812435916694087e-07, "loss": 0.2818, "step": 6490, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.4827586206896552, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5833333333333334, "success_rate.epoch.env.logic": 0.5585585585585585, "success_rate.epoch.env.math": 0.9795918367346939, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.9278523489932886, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6111623080909291, "success_rate.epoch.global": 0.8272827282728272, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952233895174325, "tokens_p.mean_in_band": 0.5006917317708334, "tokens_rate.above_band": 0.9575502984744638, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04244970152553615 }, { "epoch": 1.079621010638298, "grad_norm": 92.89104092271577, "learning_rate": 1.7809783427759971e-07, "loss": 0.3989, "step": 6495, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.4666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.6, "success_rate.epoch.env.logic": 0.5585585585585585, "success_rate.epoch.env.math": 0.9795918367346939, "success_rate.epoch.env.sat": 0.13513513513513514, "success_rate.epoch.env.science": 0.9269102990033222, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6262804289382362, "success_rate.epoch.global": 0.826797385620915, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9953051643192489, "tokens_p.mean_in_band": 0.59912109375, "tokens_rate.above_band": 0.9953271028037384, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004672897196261682 }, { "epoch": 1.0804521276595744, "grad_norm": 53.10069115599052, "learning_rate": 1.7807131276413672e-07, "loss": 0.3364, "step": 6500, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.4666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.6, "success_rate.epoch.env.logic": 0.5585585585585585, "success_rate.epoch.env.math": 0.9803921568627451, "success_rate.epoch.env.sat": 0.13157894736842105, "success_rate.epoch.env.science": 0.9275123558484349, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6260846279570044, "success_rate.epoch.global": 0.8275862068965517, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9896472392638037, "tokens_p.mean_in_band": 0.5596217105263158, "tokens_rate.above_band": 0.8956043956043956, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1043956043956044 }, { "epoch": 1.0812832446808511, "grad_norm": 191.37010940700353, "learning_rate": 1.7804479467362105e-07, "loss": 0.2535, "step": 6505, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.4666666666666667, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.6153846153846154, "success_rate.epoch.env.logic": 0.5585585585585585, "success_rate.epoch.env.math": 0.9809523809523809, "success_rate.epoch.env.sat": 0.13157894736842105, "success_rate.epoch.env.science": 0.9263502454991817, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6274285124229133, "success_rate.epoch.global": 0.8279914529914529, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9847248134328358, "tokens_p.mean_in_band": 0.7765066964285714, "tokens_rate.above_band": 0.950354609929078, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04964539007092199 }, { "epoch": 1.0821143617021276, "grad_norm": 46.93725625490084, "learning_rate": 1.7801828005311586e-07, "loss": 0.4796, "step": 6510, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5925925925925926, "success_rate.epoch.env.logic": 0.5585585585585585, "success_rate.epoch.env.math": 0.9813084112149533, "success_rate.epoch.env.sat": 0.13157894736842105, "success_rate.epoch.env.science": 0.9270664505672609, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6240854623408922, "success_rate.epoch.global": 0.8276955602536998, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9941420014094433, "tokens_p.mean_below_band": 6.780028343200684e-07, "tokens_p.mean_in_band": 0.3543172554347826, "tokens_rate.above_band": 0.7109218436873748, "tokens_rate.below_band": 0.001002004008016032, "tokens_rate.in_band": 0.2880761523046092 }, { "epoch": 1.0829454787234043, "grad_norm": 105.94141958340671, "learning_rate": 1.7799176894967804e-07, "loss": 0.333, "step": 6515, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.5663716814159292, "success_rate.epoch.env.math": 0.9814814814814815, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.927536231884058, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6223320992416426, "success_rate.epoch.global": 0.8263598326359832, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9935429676893837, "tokens_p.mean_below_band": 7.338821887969971e-07, "tokens_p.mean_in_band": 0.5341239429175476, "tokens_rate.above_band": 0.8859754630743325, "tokens_rate.below_band": 0.00024055809477988935, "tokens_rate.in_band": 0.11378397883088766 }, { "epoch": 1.0837765957446808, "grad_norm": 174.17597693767556, "learning_rate": 1.7796526141035836e-07, "loss": 0.3843, "step": 6520, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.5739130434782609, "success_rate.epoch.env.math": 0.9814814814814815, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.9267515923566879, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.622946346744821, "success_rate.epoch.global": 0.8269430051813471, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9924380466472303, "tokens_p.mean_below_band": 1.6916601452976465e-10, "tokens_p.mean_in_band": 0.8662683823529411, "tokens_rate.above_band": 0.9501385041551247, "tokens_rate.below_band": 0.002770083102493075, "tokens_rate.in_band": 0.04709141274238227 }, { "epoch": 1.0846077127659575, "grad_norm": 89.97509558787614, "learning_rate": 1.7793875748220117e-07, "loss": 0.3639, "step": 6525, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.5641025641025641, "success_rate.epoch.env.math": 0.9814814814814815, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.925984251968504, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6219847267662864, "success_rate.epoch.global": 0.8254620123203286, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.2857142857142857, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9958263888888889, "tokens_p.mean_below_band": 7.82310962677002e-07, "tokens_p.mean_in_band": 0.5351166328600405, "tokens_rate.above_band": 0.9010812975570685, "tokens_rate.below_band": 0.00020024028834601522, "tokens_rate.in_band": 0.0987184621545855 }, { "epoch": 1.085438829787234, "grad_norm": 105.98294587238055, "learning_rate": 1.779122572122445e-07, "loss": 0.4785, "step": 6530, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.46875, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.5677966101694916, "success_rate.epoch.env.math": 0.9814814814814815, "success_rate.epoch.env.sat": 0.125, "success_rate.epoch.env.science": 0.926791277258567, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6279922370004344, "success_rate.epoch.global": 0.8272357723577236, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9921153846153846, "tokens_p.mean_in_band": 0.7970252403846154, "tokens_rate.above_band": 0.9615384615384616, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038461538461538464 }, { "epoch": 1.0862699468085106, "grad_norm": 31.33804722027422, "learning_rate": 1.778857606475198e-07, "loss": 0.4783, "step": 6535, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.48484848484848486, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.5714285714285714, "success_rate.epoch.env.math": 0.9814814814814815, "success_rate.epoch.env.sat": 0.12195121951219512, "success_rate.epoch.env.science": 0.9274691358024691, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6295703755607672, "success_rate.epoch.global": 0.8277945619335347, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9960848519362187, "tokens_p.mean_in_band": 0.58734375, "tokens_rate.above_band": 0.8977505112474438, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10224948875255624 }, { "epoch": 1.0871010638297873, "grad_norm": 73.4969904114429, "learning_rate": 1.7785926783505195e-07, "loss": 0.2282, "step": 6540, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.48484848484848486, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5714285714285714, "success_rate.epoch.env.logic": 0.575, "success_rate.epoch.env.math": 0.981651376146789, "success_rate.epoch.env.sat": 0.11904761904761904, "success_rate.epoch.env.science": 0.9282442748091603, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6297169993588446, "success_rate.epoch.global": 0.8285144566301097, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9940277777777777, "tokens_p.mean_in_band": 0.6640625, "tokens_rate.above_band": 0.9183673469387755, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08163265306122448 }, { "epoch": 1.0879321808510638, "grad_norm": 89.49092934806677, "learning_rate": 1.7783277882185925e-07, "loss": 0.3395, "step": 6545, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.48484848484848486, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.5772357723577236, "success_rate.epoch.env.math": 0.981651376146789, "success_rate.epoch.env.sat": 0.11904761904761904, "success_rate.epoch.env.science": 0.9288956127080181, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.628188151791485, "success_rate.epoch.global": 0.8282329713721619, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9958591811414392, "tokens_p.mean_below_band": 4.4330954551696777e-07, "tokens_p.mean_in_band": 0.5228365384615384, "tokens_rate.above_band": 0.8904109589041096, "tokens_rate.below_band": 0.0004418912947414936, "tokens_rate.in_band": 0.10914714980114892 }, { "epoch": 1.0887632978723405, "grad_norm": 100.66617837587154, "learning_rate": 1.7780629365495322e-07, "loss": 0.3668, "step": 6550, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.47058823529411764, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.5772357723577236, "success_rate.epoch.env.math": 0.981651376146789, "success_rate.epoch.env.sat": 0.11904761904761904, "success_rate.epoch.env.science": 0.9296407185628742, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6269595023642568, "success_rate.epoch.global": 0.8285994123408423, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9940181518151815, "tokens_p.mean_below_band": 1.8361788534093648e-08, "tokens_rate.above_band": 0.9934426229508196, "tokens_rate.below_band": 0.006557377049180328, "tokens_rate.in_band": 0.0 }, { "epoch": 1.089594414893617, "grad_norm": 33.818722055195266, "learning_rate": 1.777798123813385e-07, "loss": 0.3124, "step": 6555, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45714285714285713, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.5725806451612904, "success_rate.epoch.env.math": 0.9821428571428571, "success_rate.epoch.env.sat": 0.11627906976744186, "success_rate.epoch.env.science": 0.9300595238095238, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6251450688746975, "success_rate.epoch.global": 0.8273520853540253, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9975676121372031, "tokens_p.mean_in_band": 0.5640836148648649, "tokens_rate.above_band": 0.9534591194968554, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04654088050314465 }, { "epoch": 1.0904255319148937, "grad_norm": 68.87325475131094, "learning_rate": 1.7775333504801289e-07, "loss": 0.3797, "step": 6560, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.43243243243243246, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.5634920634920635, "success_rate.epoch.env.math": 0.9823008849557522, "success_rate.epoch.env.sat": 0.11627906976744186, "success_rate.epoch.env.science": 0.9289940828402367, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6295656974017237, "success_rate.epoch.global": 0.8242074927953891, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.55, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9982399874843555, "tokens_p.mean_in_band": 0.5787550403225806, "tokens_rate.above_band": 0.9809699201964396, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019030079803560467 }, { "epoch": 1.0912566489361701, "grad_norm": 61.59442021774853, "learning_rate": 1.7772686170196716e-07, "loss": 0.3156, "step": 6565, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.43243243243243246, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.5590551181102362, "success_rate.epoch.env.math": 0.9823008849557522, "success_rate.epoch.env.sat": 0.1111111111111111, "success_rate.epoch.env.science": 0.9296187683284457, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6287493138971829, "success_rate.epoch.global": 0.8228571428571428, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9955481843575419, "tokens_p.mean_in_band": 0.6449468085106383, "tokens_rate.above_band": 0.9384010484927916, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.061598951507208385 }, { "epoch": 1.0920877659574468, "grad_norm": 130.11674023574747, "learning_rate": 1.77700392390185e-07, "loss": 0.3977, "step": 6570, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.425, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.5590551181102362, "success_rate.epoch.env.math": 0.9823008849557522, "success_rate.epoch.env.sat": 0.10869565217391304, "success_rate.epoch.env.science": 0.9302325581395349, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6279098501191337, "success_rate.epoch.global": 0.8216981132075472, "success_rate.window.env.agentgym:alfworld": 0.3333333333333333, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9972098214285714, "tokens_p.mean_in_band": 0.68271484375, "tokens_rate.above_band": 0.9710982658959537, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028901734104046242 }, { "epoch": 1.0929188829787233, "grad_norm": 78.33227863288644, "learning_rate": 1.7767392715964298e-07, "loss": 0.3319, "step": 6575, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4146341463414634, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.5581395348837209, "success_rate.epoch.env.math": 0.9826086956521739, "success_rate.epoch.env.sat": 0.10869565217391304, "success_rate.epoch.env.science": 0.9305354558610709, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6269397838948524, "success_rate.epoch.global": 0.8211610486891385, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9961391235480465, "tokens_p.mean_in_band": 0.6548887310606061, "tokens_rate.above_band": 0.9663265306122449, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0336734693877551 }, { "epoch": 1.09375, "grad_norm": 40.57218284438359, "learning_rate": 1.7764746605731038e-07, "loss": 0.4888, "step": 6580, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4146341463414634, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.5581395348837209, "success_rate.epoch.env.math": 0.9826086956521739, "success_rate.epoch.env.sat": 0.10869565217391304, "success_rate.epoch.env.science": 0.9296987087517934, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6268637159758271, "success_rate.epoch.global": 0.8212290502793296, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.990530303030303, "tokens_p.mean_in_band": 0.7130681818181818, "tokens_rate.above_band": 0.8333333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16666666666666666 }, { "epoch": 1.0945811170212767, "grad_norm": 58.64597739572771, "learning_rate": 1.776210091301492e-07, "loss": 0.4177, "step": 6585, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4146341463414634, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.5581395348837209, "success_rate.epoch.env.math": 0.9826086956521739, "success_rate.epoch.env.sat": 0.10869565217391304, "success_rate.epoch.env.science": 0.9292786421499293, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6268255281029305, "success_rate.epoch.global": 0.8219557195571956, "success_rate.window.env.science": 0.9, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9874543795620438, "tokens_p.mean_in_band": 0.6271033653846154, "tokens_rate.above_band": 0.8404907975460123, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15950920245398773 }, { "epoch": 1.0954122340425532, "grad_norm": 180.39151512763016, "learning_rate": 1.7759455642511398e-07, "loss": 0.2981, "step": 6590, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4146341463414634, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5517241379310345, "success_rate.epoch.env.logic": 0.5538461538461539, "success_rate.epoch.env.math": 0.9827586206896551, "success_rate.epoch.env.sat": 0.10869565217391304, "success_rate.epoch.env.science": 0.9297752808988764, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6264939992619177, "success_rate.epoch.global": 0.8221814848762603, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969713656387665, "tokens_p.mean_in_band": 0.75390625, "tokens_rate.above_band": 0.9869565217391304, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013043478260869565 }, { "epoch": 1.0962433510638299, "grad_norm": 81.61788518495, "learning_rate": 1.775681079891518e-07, "loss": 0.3315, "step": 6595, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4146341463414634, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.556390977443609, "success_rate.epoch.env.math": 0.9829059829059829, "success_rate.epoch.env.sat": 0.10638297872340426, "success_rate.epoch.env.science": 0.9301675977653632, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6248922742284687, "success_rate.epoch.global": 0.821071752951862, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.994911369193154, "tokens_p.mean_in_band": 0.5670432220039293, "tokens_rate.above_band": 0.8893237660360948, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11067623396390519 }, { "epoch": 1.0970744680851063, "grad_norm": 66.41083767815665, "learning_rate": 1.7754166386920214e-07, "loss": 0.3743, "step": 6600, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4146341463414634, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5597014925373134, "success_rate.epoch.env.math": 0.975, "success_rate.epoch.env.sat": 0.10416666666666667, "success_rate.epoch.env.science": 0.9305555555555556, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6243082904031211, "success_rate.epoch.global": 0.8207207207207208, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5416666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9997384937238494, "tokens_p.mean_below_band": 1.2931877790833823e-12, "tokens_p.mean_in_band": 0.5482954545454546, "tokens_rate.above_band": 0.9689189189189189, "tokens_rate.below_band": 0.0013513513513513514, "tokens_rate.in_band": 0.02972972972972973 }, { "epoch": 1.097905585106383, "grad_norm": 60.52325472022653, "learning_rate": 1.7751522411219688e-07, "loss": 0.2481, "step": 6605, "success_rate.epoch.env.abd": 0.6363636363636364, "success_rate.epoch.env.agentgym:alfworld": 0.40476190476190477, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5555555555555556, "success_rate.epoch.env.math": 0.9752066115702479, "success_rate.epoch.env.sat": 0.10416666666666667, "success_rate.epoch.env.science": 0.9297520661157025, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6262854340333679, "success_rate.epoch.global": 0.8196428571428571, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5666666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9991512345679012, "tokens_p.mean_below_band": 1.525040715932846e-08, "tokens_p.mean_in_band": 0.5091145833333334, "tokens_rate.above_band": 0.9842041312272175, "tokens_rate.below_band": 0.001215066828675577, "tokens_rate.in_band": 0.014580801944106925 }, { "epoch": 1.0987367021276595, "grad_norm": 84.84879206854862, "learning_rate": 1.7748878876506007e-07, "loss": 0.2854, "step": 6610, "success_rate.epoch.env.abd": 0.6363636363636364, "success_rate.epoch.env.agentgym:alfworld": 0.40476190476190477, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5333333333333333, "success_rate.epoch.env.logic": 0.5514705882352942, "success_rate.epoch.env.math": 0.975609756097561, "success_rate.epoch.env.sat": 0.10416666666666667, "success_rate.epoch.env.science": 0.9275956284153005, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6257546830794269, "success_rate.epoch.global": 0.8184233835252436, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9992070895522388, "tokens_p.mean_below_band": 1.4915713109076023e-10, "tokens_p.mean_in_band": 0.552801724137931, "tokens_rate.above_band": 0.9571428571428572, "tokens_rate.below_band": 0.0014285714285714286, "tokens_rate.in_band": 0.041428571428571426 }, { "epoch": 1.0995678191489362, "grad_norm": 307.9009780617586, "learning_rate": 1.7746235787470795e-07, "loss": 0.4203, "step": 6615, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.40476190476190477, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.5434782608695652, "success_rate.epoch.env.math": 0.9682539682539683, "success_rate.epoch.env.sat": 0.10204081632653061, "success_rate.epoch.env.science": 0.9277929155313351, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.625374869515276, "success_rate.epoch.global": 0.8156277436347673, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.994165868836764, "tokens_p.mean_below_band": 9.942959877662361e-08, "tokens_p.mean_in_band": 0.5082828177257525, "tokens_rate.above_band": 0.8744244453746337, "tokens_rate.below_band": 0.0004185851820845542, "tokens_rate.in_band": 0.1251569694432817 }, { "epoch": 1.1003989361702127, "grad_norm": 63.74198812391206, "learning_rate": 1.7743593148804894e-07, "loss": 0.2712, "step": 6620, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.40476190476190477, "success_rate.epoch.env.agentgym:sciworld": 0.0, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.5467625899280576, "success_rate.epoch.env.math": 0.96875, "success_rate.epoch.env.sat": 0.10204081632653061, "success_rate.epoch.env.science": 0.927027027027027, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6256489124516592, "success_rate.epoch.global": 0.8162020905923345, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9047619047619048, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9901315789473685, "tokens_p.mean_below_band": 2.648448571562767e-09, "tokens_p.mean_in_band": 0.7890625, "tokens_rate.above_band": 0.9178743961352657, "tokens_rate.below_band": 0.004830917874396135, "tokens_rate.in_band": 0.07729468599033816 }, { "epoch": 1.1012300531914894, "grad_norm": 67.92180948075969, "learning_rate": 1.7740950965198335e-07, "loss": 0.3405, "step": 6625, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.40476190476190477, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.5454545454545454, "success_rate.epoch.env.math": 0.9615384615384616, "success_rate.epoch.env.sat": 0.10204081632653061, "success_rate.epoch.env.science": 0.927321668909825, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6703557359923635, "success_rate.epoch.global": 0.8151986183074266, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9990313653136531, "tokens_p.mean_below_band": 1.7497114868092467e-13, "tokens_p.mean_in_band": 0.5560897435897436, "tokens_rate.above_band": 0.9713261648745519, "tokens_rate.below_band": 0.0007168458781362007, "tokens_rate.in_band": 0.02795698924731183 }, { "epoch": 1.102061170212766, "grad_norm": 95.70924124375362, "learning_rate": 1.7738309241340352e-07, "loss": 0.3161, "step": 6630, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.40476190476190477, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.5486111111111112, "success_rate.epoch.env.math": 0.9615384615384616, "success_rate.epoch.env.sat": 0.10204081632653061, "success_rate.epoch.env.science": 0.928, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.67070436296934, "success_rate.epoch.global": 0.8164665523156089, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9894770408163265, "tokens_p.mean_in_band": 0.8190104166666666, "tokens_rate.above_band": 0.9849246231155779, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01507537688442211 }, { "epoch": 1.1028922872340425, "grad_norm": 83.48372274800138, "learning_rate": 1.773566798191935e-07, "loss": 0.425, "step": 6635, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.40476190476190477, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.547945205479452, "success_rate.epoch.env.math": 0.9618320610687023, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.928476821192053, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6705283355842585, "success_rate.epoch.global": 0.8161702127659575, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9961801040312094, "tokens_p.mean_in_band": 0.583984375, "tokens_rate.above_band": 0.9412484700122399, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0587515299877601 }, { "epoch": 1.1037234042553192, "grad_norm": 187.5878506578719, "learning_rate": 1.7733027191622926e-07, "loss": 0.3096, "step": 6640, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.40476190476190477, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.5540540540540541, "success_rate.epoch.env.math": 0.9548872180451128, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9289473684210526, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6704951131097142, "success_rate.epoch.global": 0.816722972972973, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7222222222222222, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983529819694869, "tokens_p.mean_below_band": 7.048583938740194e-11, "tokens_p.mean_in_band": 0.5127840909090909, "tokens_rate.above_band": 0.9836289222373806, "tokens_rate.below_band": 0.001364256480218281, "tokens_rate.in_band": 0.015006821282401092 }, { "epoch": 1.1045545212765957, "grad_norm": 78.48493565379526, "learning_rate": 1.773038687513783e-07, "loss": 0.3308, "step": 6645, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.40476190476190477, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.75, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.5540540540540541, "success_rate.epoch.env.math": 0.9558823529411765, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9295039164490861, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6706361751937231, "success_rate.epoch.global": 0.818105616093881, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9897629310344828, "tokens_p.mean_in_band": 0.83984375, "tokens_rate.above_band": 0.943089430894309, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.056910569105691054 }, { "epoch": 1.1053856382978724, "grad_norm": 82.13432992845564, "learning_rate": 1.7727747037149975e-07, "loss": 0.2639, "step": 6650, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.40476190476190477, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.5503355704697986, "success_rate.epoch.env.math": 0.9562043795620438, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9287564766839378, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6748049118547651, "success_rate.epoch.global": 0.8178036605657238, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9941502463054187, "tokens_p.mean_in_band": 0.5443816489361702, "tokens_rate.above_band": 0.9452852153667055, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05471478463329453 }, { "epoch": 1.1062167553191489, "grad_norm": 184.8600193428981, "learning_rate": 1.7725107682344434e-07, "loss": 0.2572, "step": 6655, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.3953488372093023, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.5503355704697986, "success_rate.epoch.env.math": 0.9571428571428572, "success_rate.epoch.env.sat": 0.09615384615384616, "success_rate.epoch.env.science": 0.9291237113402062, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6737182292037038, "success_rate.epoch.global": 0.8168316831683168, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9977882420091324, "tokens_p.mean_in_band": 0.6877872242647058, "tokens_rate.above_band": 0.9279661016949152, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07203389830508475 }, { "epoch": 1.1070478723404256, "grad_norm": 84.57176337533897, "learning_rate": 1.772246881540541e-07, "loss": 0.2634, "step": 6660, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.3953488372093023, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.5503355704697986, "success_rate.epoch.env.math": 0.9577464788732394, "success_rate.epoch.env.sat": 0.09615384615384616, "success_rate.epoch.env.science": 0.929757343550447, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6738307068346695, "success_rate.epoch.global": 0.8181818181818182, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9863138686131386, "tokens_p.mean_in_band": 0.62890625, "tokens_rate.above_band": 0.9856115107913669, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014388489208633094 }, { "epoch": 1.107878989361702, "grad_norm": 86.256842118094, "learning_rate": 1.7719830441016246e-07, "loss": 0.5245, "step": 6665, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4090909090909091, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.5533333333333333, "success_rate.epoch.env.math": 0.958041958041958, "success_rate.epoch.env.sat": 0.09615384615384616, "success_rate.epoch.env.science": 0.9289340101522843, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6753045232451874, "success_rate.epoch.global": 0.8185516680227828, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6599999999999999, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9988137603795967, "tokens_p.mean_in_band": 0.4981219951923077, "tokens_rate.above_band": 0.9418994413407821, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05810055865921788 }, { "epoch": 1.1087101063829787, "grad_norm": 59.28796511278705, "learning_rate": 1.771719256385941e-07, "loss": 0.3414, "step": 6670, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4222222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.5533333333333333, "success_rate.epoch.env.math": 0.958041958041958, "success_rate.epoch.env.sat": 0.09615384615384616, "success_rate.epoch.env.science": 0.9297365119196989, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6765712336905264, "success_rate.epoch.global": 0.8200161420500404, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9939963054187192, "tokens_p.mean_in_band": 0.79052734375, "tokens_rate.above_band": 0.9806763285024155, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01932367149758454 }, { "epoch": 1.1095412234042552, "grad_norm": 30.41967006378997, "learning_rate": 1.771455518861649e-07, "loss": 0.5756, "step": 6675, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4222222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.5562913907284768, "success_rate.epoch.env.math": 0.9583333333333334, "success_rate.epoch.env.sat": 0.09615384615384616, "success_rate.epoch.env.science": 0.9291044776119403, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6768091789976863, "success_rate.epoch.global": 0.8205128205128205, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9942567567567567, "tokens_p.mean_in_band": 0.7315912246704102, "tokens_rate.above_band": 0.9585492227979274, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04145077720207254 }, { "epoch": 1.110372340425532, "grad_norm": 99.66065630389684, "learning_rate": 1.771191831996818e-07, "loss": 0.2703, "step": 6680, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4222222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.5526315789473685, "success_rate.epoch.env.math": 0.958904109589041, "success_rate.epoch.env.sat": 0.09615384615384616, "success_rate.epoch.env.science": 0.9294554455445545, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6765602637619785, "success_rate.epoch.global": 0.8207171314741036, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9936715070643642, "tokens_p.mean_below_band": 1.0477378964424133e-08, "tokens_p.mean_in_band": 0.6063368055555556, "tokens_rate.above_band": 0.9710365853658537, "tokens_rate.below_band": 0.001524390243902439, "tokens_rate.in_band": 0.027439024390243903 }, { "epoch": 1.1112034574468086, "grad_norm": 60.95736748176639, "learning_rate": 1.7709281962594272e-07, "loss": 0.2887, "step": 6685, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4222222222222222, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.551948051948052, "success_rate.epoch.env.math": 0.9594594594594594, "success_rate.epoch.env.sat": 0.09615384615384616, "success_rate.epoch.env.science": 0.9298029556650246, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6765802031248488, "success_rate.epoch.global": 0.8210609659540776, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978861043194784, "tokens_p.mean_in_band": 0.5142045454545454, "tokens_rate.above_band": 0.9653815892997639, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03461841070023604 }, { "epoch": 1.112034574468085, "grad_norm": 317.2967101021701, "learning_rate": 1.7706646121173662e-07, "loss": 0.5627, "step": 6690, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.41304347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.53125, "success_rate.epoch.env.logic": 0.5414012738853503, "success_rate.epoch.env.math": 0.9594594594594594, "success_rate.epoch.env.sat": 0.09433962264150944, "success_rate.epoch.env.science": 0.9300613496932515, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6760201682370098, "success_rate.epoch.global": 0.8183962264150944, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9959561638591118, "tokens_p.mean_in_band": 0.6730891047297297, "tokens_rate.above_band": 0.9724497393894267, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027550260610573342 }, { "epoch": 1.1128656914893618, "grad_norm": 63.73480798988222, "learning_rate": 1.7704010800384317e-07, "loss": 0.373, "step": 6695, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.41304347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.5408805031446541, "success_rate.epoch.env.math": 0.959731543624161, "success_rate.epoch.env.sat": 0.09433962264150944, "success_rate.epoch.env.science": 0.9305724725943971, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6773353483988003, "success_rate.epoch.global": 0.8190327613104524, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9955694211409396, "tokens_p.mean_in_band": 0.75, "tokens_rate.above_band": 0.9770491803278688, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022950819672131147 }, { "epoch": 1.1136968085106382, "grad_norm": 321.201272462513, "learning_rate": 1.7701376004903286e-07, "loss": 0.2956, "step": 6700, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.41304347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5454545454545454, "success_rate.epoch.env.logic": 0.5403726708074534, "success_rate.epoch.env.math": 0.96, "success_rate.epoch.env.sat": 0.09433962264150944, "success_rate.epoch.env.science": 0.9311594202898551, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6773669458291728, "success_rate.epoch.global": 0.8196594427244582, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9953330893118595, "tokens_p.mean_in_band": 0.48674242424242425, "tokens_rate.above_band": 0.9539106145251397, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.046089385474860335 }, { "epoch": 1.114527925531915, "grad_norm": 33.41784353791032, "learning_rate": 1.7698741739406688e-07, "loss": 0.28, "step": 6705, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.41304347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5142857142857142, "success_rate.epoch.env.logic": 0.5403726708074534, "success_rate.epoch.env.math": 0.9536423841059603, "success_rate.epoch.env.sat": 0.09259259259259259, "success_rate.epoch.env.science": 0.9316546762589928, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6738416529980227, "success_rate.epoch.global": 0.8179723502304147, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9920530439249936, "tokens_p.mean_below_band": 8.195638656616211e-08, "tokens_p.mean_in_band": 0.5544701506955177, "tokens_rate.above_band": 0.8571114046675473, "tokens_rate.below_band": 0.00044033465433729633, "tokens_rate.in_band": 0.14244826067811536 }, { "epoch": 1.1153590425531914, "grad_norm": 127.26792432193703, "learning_rate": 1.7696108008569696e-07, "loss": 0.4732, "step": 6710, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.41304347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5142857142857142, "success_rate.epoch.env.logic": 0.5403726708074534, "success_rate.epoch.env.math": 0.954248366013072, "success_rate.epoch.env.sat": 0.09090909090909091, "success_rate.epoch.env.science": 0.930952380952381, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6736798516268407, "success_rate.epoch.global": 0.8176964149504196, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9922794117647059, "tokens_p.mean_in_band": 0.5978190104166666, "tokens_rate.above_band": 0.8762886597938144, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12371134020618557 }, { "epoch": 1.116190159574468, "grad_norm": 129.62614352338232, "learning_rate": 1.7693474817066541e-07, "loss": 0.4258, "step": 6715, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.41304347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5142857142857142, "success_rate.epoch.env.logic": 0.5432098765432098, "success_rate.epoch.env.math": 0.9545454545454546, "success_rate.epoch.env.sat": 0.08928571428571429, "success_rate.epoch.env.science": 0.9303423848878394, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6737617535886791, "success_rate.epoch.global": 0.817562452687358, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9890046296296297, "tokens_p.mean_in_band": 0.6699695121951219, "tokens_rate.above_band": 0.8404669260700389, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15953307392996108 }, { "epoch": 1.1170212765957448, "grad_norm": 69.63332027970154, "learning_rate": 1.7690842169570488e-07, "loss": 0.4512, "step": 6720, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.41304347826086957, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5142857142857142, "success_rate.epoch.env.logic": 0.5432098765432098, "success_rate.epoch.env.math": 0.9487179487179487, "success_rate.epoch.env.sat": 0.08928571428571429, "success_rate.epoch.env.science": 0.9309941520467836, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6732912318915371, "success_rate.epoch.global": 0.8181818181818182, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.984375, "tokens_p.mean_below_band": 3.688037395477295e-07, "tokens_p.mean_in_band": 0.7065972222222222, "tokens_rate.above_band": 0.8875739644970414, "tokens_rate.below_band": 0.005917159763313609, "tokens_rate.in_band": 0.10650887573964497 }, { "epoch": 1.1178523936170213, "grad_norm": 39.59744733736173, "learning_rate": 1.7688210070753845e-07, "loss": 0.3412, "step": 6725, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.425531914893617, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5460122699386503, "success_rate.epoch.env.math": 0.9487179487179487, "success_rate.epoch.env.sat": 0.08928571428571429, "success_rate.epoch.env.science": 0.9292343387470998, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6732226230226998, "success_rate.epoch.global": 0.8173005219985086, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.6785714285714286, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9921013570822731, "tokens_p.mean_in_band": 0.5661638166520979, "tokens_rate.above_band": 0.8607933803845218, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13920661961547823 }, { "epoch": 1.118683510638298, "grad_norm": 98.55493715977076, "learning_rate": 1.7685578525287943e-07, "loss": 0.2633, "step": 6730, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.425531914893617, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5460122699386503, "success_rate.epoch.env.math": 0.9493670886075949, "success_rate.epoch.env.sat": 0.08771929824561403, "success_rate.epoch.env.science": 0.9295612009237876, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6731689490250846, "success_rate.epoch.global": 0.8175074183976261, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980101302460203, "tokens_p.mean_in_band": 0.5940504807692307, "tokens_rate.above_band": 0.9300134589502019, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06998654104979811 }, { "epoch": 1.1195146276595744, "grad_norm": 152.38908670459642, "learning_rate": 1.7682947537843127e-07, "loss": 0.3353, "step": 6735, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.425531914893617, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5135135135135135, "success_rate.epoch.env.logic": 0.5481927710843374, "success_rate.epoch.env.math": 0.9493670886075949, "success_rate.epoch.env.sat": 0.08771929824561403, "success_rate.epoch.env.science": 0.9298850574712644, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6746251191347826, "success_rate.epoch.global": 0.8178466076696165, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.998750734861846, "tokens_p.mean_in_band": 0.6797572544642857, "tokens_rate.above_band": 0.989145183175034, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010854816824966078 }, { "epoch": 1.1203457446808511, "grad_norm": 39.96993542782013, "learning_rate": 1.7680317113088756e-07, "loss": 0.2907, "step": 6740, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.425531914893617, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5135135135135135, "success_rate.epoch.env.logic": 0.5481927710843374, "success_rate.epoch.env.math": 0.94375, "success_rate.epoch.env.sat": 0.08620689655172414, "success_rate.epoch.env.science": 0.9304446978335233, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6740278600493984, "success_rate.epoch.global": 0.8177159590043924, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9905598958333334, "tokens_p.mean_below_band": 3.342393029015511e-11, "tokens_p.mean_in_band": 0.6711309523809523, "tokens_rate.above_band": 0.897196261682243, "tokens_rate.below_band": 0.004672897196261682, "tokens_rate.in_band": 0.09813084112149532 }, { "epoch": 1.1211768617021276, "grad_norm": 35.22668115796592, "learning_rate": 1.7677687255693193e-07, "loss": 0.461, "step": 6745, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.425531914893617, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5135135135135135, "success_rate.epoch.env.logic": 0.5562130177514792, "success_rate.epoch.env.math": 0.9440993788819876, "success_rate.epoch.env.sat": 0.08620689655172414, "success_rate.epoch.env.science": 0.9308390022675737, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.659673065805445, "success_rate.epoch.global": 0.8183139534883721, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966302952503209, "tokens_p.mean_in_band": 0.7945667613636364, "tokens_rate.above_band": 0.9860759493670886, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013924050632911392 }, { "epoch": 1.1220079787234043, "grad_norm": 101.40195444705066, "learning_rate": 1.7675057970323786e-07, "loss": 0.282, "step": 6750, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.425531914893617, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5135135135135135, "success_rate.epoch.env.logic": 0.5529411764705883, "success_rate.epoch.env.math": 0.9440993788819876, "success_rate.epoch.env.sat": 0.08620689655172414, "success_rate.epoch.env.science": 0.9302587176602924, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6593228725428839, "success_rate.epoch.global": 0.8179190751445087, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.4375, "success_rate.window.global": 0.7777777777777778, "tokens_p.mean_above_band": 0.9962630792227205, "tokens_p.mean_below_band": 1.2278178473934531e-11, "tokens_p.mean_in_band": 0.6395474137931034, "tokens_rate.above_band": 0.9570815450643777, "tokens_rate.below_band": 0.001430615164520744, "tokens_rate.in_band": 0.04148783977110158 }, { "epoch": 1.1228390957446808, "grad_norm": 62.65569055804822, "learning_rate": 1.7672429261646874e-07, "loss": 0.2613, "step": 6755, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.425531914893617, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5135135135135135, "success_rate.epoch.env.logic": 0.5523255813953488, "success_rate.epoch.env.math": 0.9440993788819876, "success_rate.epoch.env.sat": 0.0847457627118644, "success_rate.epoch.env.science": 0.9305711086226204, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6591624781835411, "success_rate.epoch.global": 0.8173975557153127, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9998337765957447, "tokens_p.mean_in_band": 0.7383897569444444, "tokens_rate.above_band": 0.9543147208121827, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04568527918781726 }, { "epoch": 1.1236702127659575, "grad_norm": 25.068913038088763, "learning_rate": 1.7669801134327773e-07, "loss": 0.3015, "step": 6760, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.425531914893617, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5135135135135135, "success_rate.epoch.env.logic": 0.5523255813953488, "success_rate.epoch.env.math": 0.9454545454545454, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.9309576837416481, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6591924156665461, "success_rate.epoch.global": 0.8179871520342612, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9948660714285714, "tokens_p.mean_in_band": 0.5250459558823529, "tokens_rate.above_band": 0.89171974522293, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10828025477707007 }, { "epoch": 1.124501329787234, "grad_norm": 99.21289280221266, "learning_rate": 1.7667173593030764e-07, "loss": 0.3883, "step": 6765, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.425531914893617, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5135135135135135, "success_rate.epoch.env.logic": 0.5491329479768786, "success_rate.epoch.env.math": 0.9457831325301205, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.9314159292035398, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6589737064955457, "success_rate.epoch.global": 0.8184397163120567, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9922843855693348, "tokens_p.mean_in_band": 0.6683238636363636, "tokens_rate.above_band": 0.9641304347826087, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.035869565217391305 }, { "epoch": 1.1253324468085106, "grad_norm": 30.03715969336156, "learning_rate": 1.7664546642419087e-07, "loss": 0.2374, "step": 6770, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.425531914893617, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5135135135135135, "success_rate.epoch.env.logic": 0.5517241379310345, "success_rate.epoch.env.math": 0.9461077844311377, "success_rate.epoch.env.sat": 0.08196721311475409, "success_rate.epoch.env.science": 0.9308452250274424, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6590627080828636, "success_rate.epoch.global": 0.8183098591549296, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9915865384615384, "tokens_p.mean_in_band": 0.5603298611111112, "tokens_rate.above_band": 0.9059233449477352, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09407665505226481 }, { "epoch": 1.1261635638297873, "grad_norm": 133.4325439584598, "learning_rate": 1.7661920287154938e-07, "loss": 0.327, "step": 6775, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5135135135135135, "success_rate.epoch.env.logic": 0.5517241379310345, "success_rate.epoch.env.math": 0.9467455621301775, "success_rate.epoch.env.sat": 0.08196721311475409, "success_rate.epoch.env.science": 0.9312977099236641, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6602498306011949, "success_rate.epoch.global": 0.8194541637508748, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9962197580645161, "tokens_p.mean_in_band": 0.849609375, "tokens_rate.above_band": 0.992, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008 }, { "epoch": 1.1269946808510638, "grad_norm": 65.87937973731965, "learning_rate": 1.7659294531899452e-07, "loss": 0.3323, "step": 6780, "success_rate.epoch.env.abd": 0.6666666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5135135135135135, "success_rate.epoch.env.logic": 0.5511363636363636, "success_rate.epoch.env.math": 0.9473684210526315, "success_rate.epoch.env.sat": 0.08196721311475409, "success_rate.epoch.env.science": 0.9315960912052117, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6602801456838613, "success_rate.epoch.global": 0.8197633959638135, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9927536231884058, "tokens_p.mean_in_band": 0.7765213815789473, "tokens_rate.above_band": 0.9355932203389831, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06440677966101695 }, { "epoch": 1.1278257978723405, "grad_norm": 391.60418009006275, "learning_rate": 1.7656669381312709e-07, "loss": 0.5243, "step": 6785, "success_rate.epoch.env.abd": 0.6153846153846154, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5135135135135135, "success_rate.epoch.env.logic": 0.5511363636363636, "success_rate.epoch.env.math": 0.9473684210526315, "success_rate.epoch.env.sat": 0.08196721311475409, "success_rate.epoch.env.science": 0.9311087190527448, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6555738344625414, "success_rate.epoch.global": 0.8195020746887967, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9906754032258065, "tokens_p.mean_in_band": 0.30725400309917356, "tokens_rate.above_band": 0.6720867208672087, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.32791327913279134 }, { "epoch": 1.128656914893617, "grad_norm": 40.94470397575009, "learning_rate": 1.7654044840053695e-07, "loss": 0.2547, "step": 6790, "success_rate.epoch.env.abd": 0.6153846153846154, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5135135135135135, "success_rate.epoch.env.logic": 0.5480225988700564, "success_rate.epoch.env.math": 0.9476744186046512, "success_rate.epoch.env.sat": 0.08196721311475409, "success_rate.epoch.env.science": 0.9305555555555556, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6552682953069526, "success_rate.epoch.global": 0.8192439862542955, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995751953125, "tokens_p.mean_below_band": 1.0277290130034089e-10, "tokens_p.mean_in_band": 0.6383634868421053, "tokens_rate.above_band": 0.9696969696969697, "tokens_rate.below_band": 0.0015151515151515152, "tokens_rate.in_band": 0.02878787878787879 }, { "epoch": 1.1294880319148937, "grad_norm": 28.70464866070665, "learning_rate": 1.7651420912780338e-07, "loss": 0.3405, "step": 6795, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5480225988700564, "success_rate.epoch.env.math": 0.9479768786127167, "success_rate.epoch.env.sat": 0.08064516129032258, "success_rate.epoch.env.science": 0.9288747346072187, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.649798298012929, "success_rate.epoch.global": 0.8170648464163822, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9929052638927673, "tokens_p.mean_below_band": 3.4051481634378433e-09, "tokens_p.mean_in_band": 0.45220144752714114, "tokens_rate.above_band": 0.8118340512355475, "tokens_rate.below_band": 0.00022670596236681024, "tokens_rate.in_band": 0.18793924280208568 }, { "epoch": 1.1303191489361701, "grad_norm": 47.05704692588445, "learning_rate": 1.764879760414946e-07, "loss": 0.3716, "step": 6800, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5480225988700564, "success_rate.epoch.env.math": 0.9485714285714286, "success_rate.epoch.env.sat": 0.08064516129032258, "success_rate.epoch.env.science": 0.9294736842105263, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6499067979731126, "success_rate.epoch.global": 0.8183050847457627, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9880208333333333, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.96, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04 }, { "epoch": 1.1311502659574468, "grad_norm": 38.91865692725673, "learning_rate": 1.7646174918816797e-07, "loss": 0.3599, "step": 6805, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.550561797752809, "success_rate.epoch.env.math": 0.949438202247191, "success_rate.epoch.env.sat": 0.08064516129032258, "success_rate.epoch.env.science": 0.9296956977964324, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6502366148953326, "success_rate.epoch.global": 0.8191632928475033, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.989321608040201, "tokens_p.mean_in_band": 0.8216145833333334, "tokens_rate.above_band": 0.9707317073170731, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02926829268292683 }, { "epoch": 1.1319813829787235, "grad_norm": 95.56899990127589, "learning_rate": 1.7643552861436969e-07, "loss": 0.29, "step": 6810, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.55, "success_rate.epoch.env.math": 0.9497206703910615, "success_rate.epoch.env.sat": 0.07936507936507936, "success_rate.epoch.env.science": 0.9301355578727841, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6501348374900754, "success_rate.epoch.global": 0.8190348525469169, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99528452685422, "tokens_p.mean_in_band": 0.7050189393939394, "tokens_rate.above_band": 0.9595092024539877, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04049079754601227 }, { "epoch": 1.1328125, "grad_norm": 96.3758866676662, "learning_rate": 1.7640931436663485e-07, "loss": 0.4175, "step": 6815, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.546448087431694, "success_rate.epoch.env.math": 0.95, "success_rate.epoch.env.sat": 0.07936507936507936, "success_rate.epoch.env.science": 0.9304257528556594, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6498637113103943, "success_rate.epoch.global": 0.8186666666666667, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9983157319737801, "tokens_p.mean_below_band": 3.213062882423401e-08, "tokens_p.mean_in_band": 0.4875616776315789, "tokens_rate.above_band": 0.9723796033994334, "tokens_rate.below_band": 0.000708215297450425, "tokens_rate.in_band": 0.026912181303116147 }, { "epoch": 1.1336436170212765, "grad_norm": 53.3378040922987, "learning_rate": 1.7638310649148736e-07, "loss": 0.182, "step": 6820, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5128205128205128, "success_rate.epoch.env.logic": 0.546448087431694, "success_rate.epoch.env.math": 0.9505494505494505, "success_rate.epoch.env.sat": 0.07936507936507936, "success_rate.epoch.env.science": 0.9307851239669421, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6511118326268712, "success_rate.epoch.global": 0.8196286472148541, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9907309322033898, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9957805907172996, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004219409282700422 }, { "epoch": 1.1344747340425532, "grad_norm": 50.54492177090908, "learning_rate": 1.7635690503543972e-07, "loss": 0.3292, "step": 6825, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5128205128205128, "success_rate.epoch.env.logic": 0.546448087431694, "success_rate.epoch.env.math": 0.9513513513513514, "success_rate.epoch.env.sat": 0.07936507936507936, "success_rate.epoch.env.science": 0.9301848049281314, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6511301582416976, "success_rate.epoch.global": 0.8200395517468688, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9855603448275863, "tokens_p.mean_in_band": 0.6984375, "tokens_rate.above_band": 0.9354838709677419, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06451612903225806 }, { "epoch": 1.1353058510638299, "grad_norm": 68.55294102550499, "learning_rate": 1.7633071004499315e-07, "loss": 0.3155, "step": 6830, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5128205128205128, "success_rate.epoch.env.logic": 0.5489130434782609, "success_rate.epoch.env.math": 0.9521276595744681, "success_rate.epoch.env.sat": 0.07936507936507936, "success_rate.epoch.env.science": 0.9306122448979591, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6514636768089259, "success_rate.epoch.global": 0.8212180746561886, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9915730337078652, "tokens_p.mean_in_band": 0.876953125, "tokens_rate.above_band": 0.978021978021978, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02197802197802198 }, { "epoch": 1.1361369680851063, "grad_norm": 42.58266757112919, "learning_rate": 1.763045215666373e-07, "loss": 0.3828, "step": 6835, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5128205128205128, "success_rate.epoch.env.logic": 0.5459459459459459, "success_rate.epoch.env.math": 0.9528795811518325, "success_rate.epoch.env.sat": 0.078125, "success_rate.epoch.env.science": 0.9309644670050762, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6511815828804792, "success_rate.epoch.global": 0.8210800260247235, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9936170212765958, "tokens_p.mean_in_band": 0.6022518382352942, "tokens_rate.above_band": 0.9325396825396826, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06746031746031746 }, { "epoch": 1.136968085106383, "grad_norm": 69.81921572926505, "learning_rate": 1.762783396468503e-07, "loss": 0.2686, "step": 6840, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4489795918367347, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5128205128205128, "success_rate.epoch.env.logic": 0.5508021390374331, "success_rate.epoch.env.math": 0.953125, "success_rate.epoch.env.sat": 0.078125, "success_rate.epoch.env.science": 0.9313131313131313, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6553180643946495, "success_rate.epoch.global": 0.8222365869424693, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9946714031971581, "tokens_p.mean_in_band": 0.8251953125, "tokens_rate.above_band": 0.9791304347826087, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020869565217391306 }, { "epoch": 1.1377992021276595, "grad_norm": 94.35806235990412, "learning_rate": 1.7625216433209866e-07, "loss": 0.2955, "step": 6845, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4489795918367347, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5478723404255319, "success_rate.epoch.env.math": 0.9533678756476683, "success_rate.epoch.env.sat": 0.07575757575757576, "success_rate.epoch.env.science": 0.9306532663316583, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6536330893938638, "success_rate.epoch.global": 0.8201669877970456, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.36, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9957966141001855, "tokens_p.mean_in_band": 0.6857638888888888, "tokens_rate.above_band": 0.9089376053962901, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09106239460370995 }, { "epoch": 1.1386303191489362, "grad_norm": 70.99910087080949, "learning_rate": 1.7622599566883721e-07, "loss": 0.3005, "step": 6850, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4489795918367347, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5478723404255319, "success_rate.epoch.env.math": 0.9540816326530612, "success_rate.epoch.env.sat": 0.07575757575757576, "success_rate.epoch.env.science": 0.9301397205588823, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6536512904150108, "success_rate.epoch.global": 0.8206764518187619, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9888059701492538, "tokens_p.mean_in_band": 0.69375, "tokens_rate.above_band": 0.8993288590604027, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10067114093959731 }, { "epoch": 1.1394614361702127, "grad_norm": 108.21024004665557, "learning_rate": 1.761998337035088e-07, "loss": 0.3995, "step": 6855, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4489795918367347, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5478723404255319, "success_rate.epoch.env.math": 0.9545454545454546, "success_rate.epoch.env.sat": 0.07462686567164178, "success_rate.epoch.env.science": 0.9295634920634921, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6535382798069261, "success_rate.epoch.global": 0.8204314720812182, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9898026315789473, "tokens_p.mean_in_band": 0.5434027777777778, "tokens_rate.above_band": 0.9134615384615384, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08653846153846154 }, { "epoch": 1.1402925531914894, "grad_norm": 92.3439269604249, "learning_rate": 1.7617367848254458e-07, "loss": 0.3623, "step": 6860, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4489795918367347, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5478723404255319, "success_rate.epoch.env.math": 0.9547738693467337, "success_rate.epoch.env.sat": 0.07462686567164178, "success_rate.epoch.env.science": 0.9301868239921337, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.653615711327828, "success_rate.epoch.global": 0.8215636822194199, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9861778846153846, "tokens_p.mean_in_band": 0.8470052083333334, "tokens_rate.above_band": 0.9629629629629629, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037037037037037035 }, { "epoch": 1.141123670212766, "grad_norm": 73.52560284614265, "learning_rate": 1.7614753005236367e-07, "loss": 0.2576, "step": 6865, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4489795918367347, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5121951219512195, "success_rate.epoch.env.logic": 0.5473684210526316, "success_rate.epoch.env.math": 0.9552238805970149, "success_rate.epoch.env.sat": 0.07462686567164178, "success_rate.epoch.env.science": 0.9303921568627451, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6547381246641201, "success_rate.epoch.global": 0.821831869510665, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9905007102272727, "tokens_p.mean_in_band": 0.7652439024390244, "tokens_rate.above_band": 0.971704623878537, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028295376121463076 }, { "epoch": 1.1419547872340425, "grad_norm": 152.0024369408406, "learning_rate": 1.7612138845937307e-07, "loss": 0.311, "step": 6870, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4489795918367347, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5121951219512195, "success_rate.epoch.env.logic": 0.5416666666666666, "success_rate.epoch.env.math": 0.9554455445544554, "success_rate.epoch.env.sat": 0.07352941176470588, "success_rate.epoch.env.science": 0.9307992202729045, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6541771718527291, "success_rate.epoch.global": 0.821072319201995, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.998793242156074, "tokens_p.mean_in_band": 0.5978190104166666, "tokens_rate.above_band": 0.9628195197521301, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037180480247869865 }, { "epoch": 1.1427859042553192, "grad_norm": 61.435888614597644, "learning_rate": 1.7609525374996777e-07, "loss": 0.2541, "step": 6875, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4489795918367347, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5121951219512195, "success_rate.epoch.env.logic": 0.5440414507772021, "success_rate.epoch.env.math": 0.9556650246305419, "success_rate.epoch.env.sat": 0.07352941176470588, "success_rate.epoch.env.science": 0.9312015503875969, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6544495895164849, "success_rate.epoch.global": 0.8219602977667494, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9869909502262444, "tokens_p.mean_in_band": 0.7719350961538461, "tokens_rate.above_band": 0.9444444444444444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05555555555555555 }, { "epoch": 1.1436170212765957, "grad_norm": 59.507457528148194, "learning_rate": 1.7606912597053042e-07, "loss": 0.2177, "step": 6880, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4489795918367347, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5121951219512195, "success_rate.epoch.env.logic": 0.5412371134020618, "success_rate.epoch.env.math": 0.9556650246305419, "success_rate.epoch.env.sat": 0.07352941176470588, "success_rate.epoch.env.science": 0.9315992292870906, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6542308023823352, "success_rate.epoch.global": 0.8221124150710315, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9889838129496403, "tokens_p.mean_in_band": 0.7447150735294118, "tokens_rate.above_band": 0.9423728813559322, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0576271186440678 }, { "epoch": 1.1444481382978724, "grad_norm": 62.537202015013435, "learning_rate": 1.7604300516743147e-07, "loss": 0.3482, "step": 6885, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4489795918367347, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5238095238095238, "success_rate.epoch.env.logic": 0.5412371134020618, "success_rate.epoch.env.math": 0.9514563106796117, "success_rate.epoch.env.sat": 0.07352941176470588, "success_rate.epoch.env.science": 0.9319271332694151, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6549338561904897, "success_rate.epoch.global": 0.8224815724815725, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9900778931750742, "tokens_p.mean_below_band": 4.3655745685100555e-09, "tokens_p.mean_in_band": 0.827880859375, "tokens_rate.above_band": 0.975397973950796, "tokens_rate.below_band": 0.001447178002894356, "tokens_rate.in_band": 0.023154848046309694 }, { "epoch": 1.1452792553191489, "grad_norm": 123.77716928348676, "learning_rate": 1.7601689138702892e-07, "loss": 0.2899, "step": 6890, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4489795918367347, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5238095238095238, "success_rate.epoch.env.logic": 0.5459183673469388, "success_rate.epoch.env.math": 0.9519230769230769, "success_rate.epoch.env.sat": 0.07352941176470588, "success_rate.epoch.env.science": 0.9322519083969466, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6554313830373872, "success_rate.epoch.global": 0.8234575442883323, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.989406779661017, "tokens_p.mean_in_band": 0.7365056818181818, "tokens_rate.above_band": 0.9554655870445344, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044534412955465584 }, { "epoch": 1.1461103723404256, "grad_norm": 57.1500794088816, "learning_rate": 1.7599078467566835e-07, "loss": 0.5215, "step": 6895, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4489795918367347, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5238095238095238, "success_rate.epoch.env.logic": 0.5482233502538071, "success_rate.epoch.env.math": 0.9473684210526315, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.9324452901998097, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6550534619922191, "success_rate.epoch.global": 0.8223844282238443, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.375, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9996141975308642, "tokens_p.mean_below_band": 1.2514647096395493e-09, "tokens_p.mean_in_band": 0.6627394153225806, "tokens_rate.above_band": 0.9278350515463918, "tokens_rate.below_band": 0.001145475372279496, "tokens_rate.in_band": 0.07101947308132875 }, { "epoch": 1.146941489361702, "grad_norm": 101.67079559653851, "learning_rate": 1.7596468507968277e-07, "loss": 0.4918, "step": 6900, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4489795918367347, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5238095238095238, "success_rate.epoch.env.logic": 0.545, "success_rate.epoch.env.math": 0.9481132075471698, "success_rate.epoch.env.sat": 0.07142857142857142, "success_rate.epoch.env.science": 0.9326375711574952, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6548456181011661, "success_rate.epoch.global": 0.822141560798548, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6944444444444443, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9973605577689243, "tokens_p.mean_below_band": 3.4051481634378433e-09, "tokens_p.mean_in_band": 0.5270743534482759, "tokens_rate.above_band": 0.9766536964980544, "tokens_rate.below_band": 0.0007782101167315176, "tokens_rate.in_band": 0.022568093385214007 }, { "epoch": 1.1477726063829787, "grad_norm": 178.91768525544094, "learning_rate": 1.7593859264539255e-07, "loss": 0.3124, "step": 6905, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4489795918367347, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5238095238095238, "success_rate.epoch.env.logic": 0.5495049504950495, "success_rate.epoch.env.math": 0.9485981308411215, "success_rate.epoch.env.sat": 0.07042253521126761, "success_rate.epoch.env.science": 0.9328922495274102, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6552309377322219, "success_rate.epoch.global": 0.822503008423586, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9923433048433048, "tokens_p.mean_in_band": 0.7178308823529411, "tokens_rate.above_band": 0.9116883116883117, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08831168831168831 }, { "epoch": 1.1486037234042552, "grad_norm": 24.327930505453125, "learning_rate": 1.7591250741910537e-07, "loss": 0.3752, "step": 6910, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5116279069767442, "success_rate.epoch.env.logic": 0.5517241379310345, "success_rate.epoch.env.math": 0.9488372093023256, "success_rate.epoch.env.sat": 0.06944444444444445, "success_rate.epoch.env.science": 0.9332079021636877, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6534704485592336, "success_rate.epoch.global": 0.8217703349282297, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9942561267980821, "tokens_p.mean_below_band": 3.4051481634378433e-09, "tokens_p.mean_in_band": 0.5106281725888325, "tokens_rate.above_band": 0.9047963364666185, "tokens_rate.below_band": 0.00024102193299590263, "tokens_rate.in_band": 0.09496264160038563 }, { "epoch": 1.149434840425532, "grad_norm": 79.71076470764915, "learning_rate": 1.7588642944711615e-07, "loss": 0.3466, "step": 6915, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5116279069767442, "success_rate.epoch.env.logic": 0.5517241379310345, "success_rate.epoch.env.math": 0.9444444444444444, "success_rate.epoch.env.sat": 0.06944444444444445, "success_rate.epoch.env.science": 0.9337068160597572, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6531164621081598, "success_rate.epoch.global": 0.8221296847114813, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9874031007751938, "tokens_p.mean_below_band": 2.9976945370435715e-09, "tokens_p.mean_in_band": 0.87109375, "tokens_rate.above_band": 0.9772727272727273, "tokens_rate.below_band": 0.007575757575757576, "tokens_rate.in_band": 0.015151515151515152 }, { "epoch": 1.1502659574468086, "grad_norm": 84.51212873306991, "learning_rate": 1.758603587757069e-07, "loss": 0.3921, "step": 6920, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5116279069767442, "success_rate.epoch.env.logic": 0.5485436893203883, "success_rate.epoch.env.math": 0.944954128440367, "success_rate.epoch.env.sat": 0.0684931506849315, "success_rate.epoch.env.science": 0.933953488372093, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6528096088298052, "success_rate.epoch.global": 0.8214074512123004, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9981060606060606, "tokens_p.mean_in_band": 0.6362847222222222, "tokens_rate.above_band": 0.9720930232558139, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027906976744186046 }, { "epoch": 1.151097074468085, "grad_norm": 76.92137550398597, "learning_rate": 1.7583429545114663e-07, "loss": 0.3761, "step": 6925, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5116279069767442, "success_rate.epoch.env.logic": 0.5485436893203883, "success_rate.epoch.env.math": 0.9459459459459459, "success_rate.epoch.env.sat": 0.0684931506849315, "success_rate.epoch.env.science": 0.9342592592592592, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6529275714109638, "success_rate.epoch.global": 0.8223529411764706, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.990625, "tokens_p.mean_in_band": 0.6796875, "tokens_rate.above_band": 0.9917355371900827, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008264462809917356 }, { "epoch": 1.1519281914893618, "grad_norm": 100.83000719757341, "learning_rate": 1.758082395196914e-07, "loss": 0.3032, "step": 6930, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5116279069767442, "success_rate.epoch.env.logic": 0.5528846153846154, "success_rate.epoch.env.math": 0.9459459459459459, "success_rate.epoch.env.sat": 0.06756756756756757, "success_rate.epoch.env.science": 0.9337626494940202, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6531929107911115, "success_rate.epoch.global": 0.8222222222222222, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9943181818181818, "tokens_p.mean_in_band": 0.5801630434782609, "tokens_rate.above_band": 0.928125, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.071875 }, { "epoch": 1.1527593085106382, "grad_norm": 47.26800670644467, "learning_rate": 1.7578219102758406e-07, "loss": 0.1893, "step": 6935, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5116279069767442, "success_rate.epoch.env.logic": 0.5476190476190477, "success_rate.epoch.env.math": 0.9459459459459459, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9340659340659341, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6526598940552429, "success_rate.epoch.global": 0.8213038416763678, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9977334104938271, "tokens_p.mean_in_band": 0.5945172991071429, "tokens_rate.above_band": 0.9585798816568047, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04142011834319527 }, { "epoch": 1.153590425531915, "grad_norm": 123.17156960894862, "learning_rate": 1.7575615002105437e-07, "loss": 0.5311, "step": 6940, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5116279069767442, "success_rate.epoch.env.logic": 0.5450236966824644, "success_rate.epoch.env.math": 0.9461883408071748, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9335154826958105, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6523959479238358, "success_rate.epoch.global": 0.8209733487833141, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9989905196629213, "tokens_p.mean_in_band": 0.63648681640625, "tokens_rate.above_band": 0.9468085106382979, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05319148936170213 }, { "epoch": 1.1544215425531914, "grad_norm": 35.35487462564525, "learning_rate": 1.757301165463187e-07, "loss": 0.2618, "step": 6945, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4423076923076923, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5116279069767442, "success_rate.epoch.env.logic": 0.5492957746478874, "success_rate.epoch.env.math": 0.9461883408071748, "success_rate.epoch.env.sat": 0.06578947368421052, "success_rate.epoch.env.science": 0.9337568058076225, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6529363025058786, "success_rate.epoch.global": 0.8207492795389049, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9982972756410257, "tokens_p.mean_in_band": 0.6156684027777778, "tokens_rate.above_band": 0.9719626168224299, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028037383177570093 }, { "epoch": 1.155252659574468, "grad_norm": 157.5594518718164, "learning_rate": 1.7570409064958004e-07, "loss": 0.2543, "step": 6950, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4423076923076923, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5492957746478874, "success_rate.epoch.env.math": 0.9464285714285714, "success_rate.epoch.env.sat": 0.06578947368421052, "success_rate.epoch.env.science": 0.9341155234657039, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6519336698970363, "success_rate.epoch.global": 0.8209982788296041, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9911369554924242, "tokens_p.mean_in_band": 0.5535351138828634, "tokens_rate.above_band": 0.8208317139525846, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.17916828604741547 }, { "epoch": 1.1560837765957448, "grad_norm": 72.8843058128863, "learning_rate": 1.7567807237702804e-07, "loss": 0.3149, "step": 6955, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4423076923076923, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5492957746478874, "success_rate.epoch.env.math": 0.9469026548672567, "success_rate.epoch.env.sat": 0.06578947368421052, "success_rate.epoch.env.science": 0.9336917562724014, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6519382441011619, "success_rate.epoch.global": 0.8214489446662864, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9887019230769231, "tokens_p.mean_below_band": 8.754432201385498e-08, "tokens_p.mean_in_band": 0.86328125, "tokens_rate.above_band": 0.9629629629629629, "tokens_rate.below_band": 0.007407407407407408, "tokens_rate.in_band": 0.02962962962962963 }, { "epoch": 1.1569148936170213, "grad_norm": 56.63815850444694, "learning_rate": 1.7565206177483877e-07, "loss": 0.2096, "step": 6960, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4423076923076923, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5514018691588785, "success_rate.epoch.env.math": 0.9475982532751092, "success_rate.epoch.env.sat": 0.06578947368421052, "success_rate.epoch.env.science": 0.9339875111507583, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6522198302645438, "success_rate.epoch.global": 0.822360953461975, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9891877637130801, "tokens_p.mean_in_band": 0.8441051136363636, "tokens_rate.above_band": 0.9556451612903226, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04435483870967742 }, { "epoch": 1.157746010638298, "grad_norm": 84.65821011976759, "learning_rate": 1.7562605888917462e-07, "loss": 0.3921, "step": 6965, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4339622641509434, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5488372093023256, "success_rate.epoch.env.math": 0.9475982532751092, "success_rate.epoch.env.sat": 0.06578947368421052, "success_rate.epoch.env.science": 0.9342806394316163, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6502445510969479, "success_rate.epoch.global": 0.8214689265536723, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.2, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9986020108450068, "tokens_p.mean_below_band": 1.6880221664905548e-09, "tokens_p.mean_in_band": 0.3689205545774648, "tokens_rate.above_band": 0.9391046042860174, "tokens_rate.below_band": 0.0006365372374283895, "tokens_rate.in_band": 0.06025885847655421 }, { "epoch": 1.1585771276595744, "grad_norm": 33.6778048202698, "learning_rate": 1.7560006376618442e-07, "loss": 0.2421, "step": 6970, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4339622641509434, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5462962962962963, "success_rate.epoch.env.math": 0.9478260869565217, "success_rate.epoch.env.sat": 0.06578947368421052, "success_rate.epoch.env.science": 0.9347442680776014, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6500764192170723, "success_rate.epoch.global": 0.8219101123595506, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.999420401854714, "tokens_p.mean_in_band": 0.5535714285714286, "tokens_rate.above_band": 0.9788199697428139, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02118003025718608 }, { "epoch": 1.1594082446808511, "grad_norm": 248.1887875816634, "learning_rate": 1.7557407645200306e-07, "loss": 0.3623, "step": 6975, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4339622641509434, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5458715596330275, "success_rate.epoch.env.math": 0.9484978540772532, "success_rate.epoch.env.sat": 0.06578947368421052, "success_rate.epoch.env.science": 0.9348591549295775, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6501093207906576, "success_rate.epoch.global": 0.8220481253497481, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9999571917808219, "tokens_p.mean_in_band": 0.6028645833333334, "tokens_rate.above_band": 0.9878213802435724, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012178619756427604 }, { "epoch": 1.1602393617021276, "grad_norm": 106.84248333089963, "learning_rate": 1.7554809699275171e-07, "loss": 0.4171, "step": 6980, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4339622641509434, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5458715596330275, "success_rate.epoch.env.math": 0.9487179487179487, "success_rate.epoch.env.sat": 0.06493506493506493, "success_rate.epoch.env.science": 0.9351446099912357, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6510877073419594, "success_rate.epoch.global": 0.8222841225626741, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9938563327032136, "tokens_p.mean_in_band": 0.729248046875, "tokens_rate.above_band": 0.9429590017825312, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0570409982174688 }, { "epoch": 1.1610704787234043, "grad_norm": 42.32926787228792, "learning_rate": 1.7552212543453752e-07, "loss": 0.5429, "step": 6985, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4339622641509434, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5458715596330275, "success_rate.epoch.env.math": 0.9491525423728814, "success_rate.epoch.env.sat": 0.06493506493506493, "success_rate.epoch.env.science": 0.9346689895470384, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6510839776338444, "success_rate.epoch.global": 0.8226164079822617, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9949723974763407, "tokens_p.mean_below_band": 2.8405338525772095e-08, "tokens_p.mean_in_band": 0.5094572368421053, "tokens_rate.above_band": 0.9694189602446484, "tokens_rate.below_band": 0.0015290519877675841, "tokens_rate.in_band": 0.0290519877675841 }, { "epoch": 1.1619015957446808, "grad_norm": 95.57475233336709, "learning_rate": 1.7549616182345367e-07, "loss": 0.3074, "step": 6990, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4339622641509434, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.547945205479452, "success_rate.epoch.env.math": 0.9497907949790795, "success_rate.epoch.env.sat": 0.06493506493506493, "success_rate.epoch.env.science": 0.9349522983521249, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.651356269202727, "success_rate.epoch.global": 0.8234969663541092, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9937042124542125, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9927272727272727, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007272727272727273 }, { "epoch": 1.1627327127659575, "grad_norm": 44.23898971664997, "learning_rate": 1.7547020620557919e-07, "loss": 0.2919, "step": 6995, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4339622641509434, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.55, "success_rate.epoch.env.math": 0.95, "success_rate.epoch.env.sat": 0.0641025641025641, "success_rate.epoch.env.science": 0.9351771823681936, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6515068494504577, "success_rate.epoch.global": 0.8236263736263736, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9955357142857143, "tokens_p.mean_below_band": 1.418811734765768e-09, "tokens_p.mean_in_band": 0.6554276315789473, "tokens_rate.above_band": 0.9180327868852459, "tokens_rate.below_band": 0.004098360655737705, "tokens_rate.in_band": 0.0778688524590164 }, { "epoch": 1.163563829787234, "grad_norm": 79.46399248260443, "learning_rate": 1.7544425862697895e-07, "loss": 0.3625, "step": 7000, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4339622641509434, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.545045045045045, "success_rate.epoch.env.math": 0.9504132231404959, "success_rate.epoch.env.sat": 0.06329113924050633, "success_rate.epoch.env.science": 0.9354005167958657, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6510405019732899, "success_rate.epoch.global": 0.8228540185893931, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9970846972176759, "tokens_p.mean_in_band": 0.5802859042553191, "tokens_rate.above_band": 0.9629629629629629, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037037037037037035 }, { "epoch": 1.1643949468085106, "grad_norm": 53.458921458680706, "learning_rate": 1.7541831913370352e-07, "loss": 0.3968, "step": 7005, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4339622641509434, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.545045045045045, "success_rate.epoch.env.math": 0.9504132231404959, "success_rate.epoch.env.sat": 0.06329113924050633, "success_rate.epoch.env.science": 0.9359521776259607, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6510906529578441, "success_rate.epoch.global": 0.8238172920065253, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9827205882352941, "tokens_p.mean_in_band": 0.80390625, "tokens_rate.above_band": 0.9444444444444444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05555555555555555 }, { "epoch": 1.1652260638297873, "grad_norm": 82.14597967312726, "learning_rate": 1.753923877717892e-07, "loss": 0.2984, "step": 7010, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4339622641509434, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.545045045045045, "success_rate.epoch.env.math": 0.9508196721311475, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.9361702127659575, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6510755024933115, "success_rate.epoch.global": 0.823943661971831, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9885714285714285, "tokens_p.mean_in_band": 0.609765625, "tokens_rate.above_band": 0.8974358974358975, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10256410256410256 }, { "epoch": 1.1660571808510638, "grad_norm": 72.68839523037914, "learning_rate": 1.7536646458725774e-07, "loss": 0.4131, "step": 7015, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4339622641509434, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5466666666666666, "success_rate.epoch.env.math": 0.9508196721311475, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.9364944961896697, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6512524029519782, "success_rate.epoch.global": 0.8242587601078167, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9946911196911197, "tokens_p.mean_in_band": 0.7075520833333333, "tokens_rate.above_band": 0.9810606060606061, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01893939393939394 }, { "epoch": 1.1668882978723405, "grad_norm": 24.258893229879263, "learning_rate": 1.753405496261165e-07, "loss": 0.4125, "step": 7020, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4339622641509434, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4791666666666667, "success_rate.epoch.env.logic": 0.5466666666666666, "success_rate.epoch.env.math": 0.9510204081632653, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.9359730412805392, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6493293072964922, "success_rate.epoch.global": 0.8234978540772532, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9940845792138896, "tokens_p.mean_below_band": 3.241002559661865e-07, "tokens_p.mean_in_band": 0.5438189338235294, "tokens_rate.above_band": 0.8901051727838591, "tokens_rate.below_band": 0.00042927666881305, "tokens_rate.in_band": 0.10946555054732775 }, { "epoch": 1.167719414893617, "grad_norm": 108.22967084602361, "learning_rate": 1.7531464293435813e-07, "loss": 0.3902, "step": 7025, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.5482456140350878, "success_rate.epoch.env.math": 0.951417004048583, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.9361344537815126, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6495875095222728, "success_rate.epoch.global": 0.8233724653148345, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9971825485382727, "tokens_p.mean_below_band": 3.9301812648773193e-07, "tokens_p.mean_in_band": 0.5773082386363636, "tokens_rate.above_band": 0.9529987239472565, "tokens_rate.below_band": 0.0002126754572522331, "tokens_rate.in_band": 0.04678860059549128 }, { "epoch": 1.1685505319148937, "grad_norm": 171.43690665886163, "learning_rate": 1.752887445579607e-07, "loss": 0.3781, "step": 7030, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.5478260869565217, "success_rate.epoch.env.math": 0.9516129032258065, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.9363484087102177, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.649586630161124, "success_rate.epoch.global": 0.823498139287613, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976572958500669, "tokens_p.mean_in_band": 0.609619140625, "tokens_rate.above_band": 0.9790301441677588, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020969855832241154 }, { "epoch": 1.1693816489361701, "grad_norm": 83.67632789272761, "learning_rate": 1.752628545428875e-07, "loss": 0.2859, "step": 7035, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46, "success_rate.epoch.env.logic": 0.5478260869565217, "success_rate.epoch.env.math": 0.952, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.9358333333333333, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6487215634606938, "success_rate.epoch.global": 0.8232804232804233, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9886773875943551, "tokens_p.mean_in_band": 0.5463895281933256, "tokens_rate.above_band": 0.7780898876404494, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.22191011235955055 }, { "epoch": 1.1702127659574468, "grad_norm": 56.60002714041991, "learning_rate": 1.752369729350869e-07, "loss": 0.2696, "step": 7040, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46, "success_rate.epoch.env.logic": 0.5497835497835498, "success_rate.epoch.env.math": 0.952191235059761, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.9360995850622407, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6498593779803944, "success_rate.epoch.global": 0.8240252897787145, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9947493224932249, "tokens_p.mean_in_band": 0.49609375, "tokens_rate.above_band": 0.9972972972972973, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002702702702702703 }, { "epoch": 1.1710438829787235, "grad_norm": 164.5444256938949, "learning_rate": 1.7521109978049248e-07, "loss": 0.3965, "step": 7045, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46, "success_rate.epoch.env.logic": 0.5536480686695279, "success_rate.epoch.env.math": 0.952191235059761, "success_rate.epoch.env.sat": 0.06172839506172839, "success_rate.epoch.env.science": 0.9363110008271298, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6501597715906304, "success_rate.epoch.global": 0.8241469816272966, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9925271739130435, "tokens_p.mean_in_band": 0.7728125, "tokens_rate.above_band": 0.9169435215946844, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08305647840531562 }, { "epoch": 1.171875, "grad_norm": 85.49191193379976, "learning_rate": 1.751852351250227e-07, "loss": 0.3958, "step": 7050, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46, "success_rate.epoch.env.logic": 0.5512820512820513, "success_rate.epoch.env.math": 0.952191235059761, "success_rate.epoch.env.sat": 0.06172839506172839, "success_rate.epoch.env.science": 0.9366776315789473, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6499780091692068, "success_rate.epoch.global": 0.8243596445373759, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972826086956522, "tokens_p.mean_in_band": 0.704483695652174, "tokens_rate.above_band": 0.9666666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03333333333333333 }, { "epoch": 1.1727061170212765, "grad_norm": 87.56327753569607, "learning_rate": 1.7515937901458102e-07, "loss": 0.3397, "step": 7055, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46, "success_rate.epoch.env.logic": 0.5531914893617021, "success_rate.epoch.env.math": 0.9523809523809523, "success_rate.epoch.env.sat": 0.06172839506172839, "success_rate.epoch.env.science": 0.936988543371522, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6501971061867902, "success_rate.epoch.global": 0.8250910983862572, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9916452442159382, "tokens_p.mean_in_band": 0.4782151442307692, "tokens_rate.above_band": 0.681260945709282, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.318739054290718 }, { "epoch": 1.1735372340425532, "grad_norm": 61.69552485540957, "learning_rate": 1.751335314950557e-07, "loss": 0.2666, "step": 7060, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47058823529411764, "success_rate.epoch.env.logic": 0.5508474576271186, "success_rate.epoch.env.math": 0.952755905511811, "success_rate.epoch.env.sat": 0.06172839506172839, "success_rate.epoch.env.science": 0.9372964169381107, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6510086543919703, "success_rate.epoch.global": 0.8254790264111859, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9942141089108911, "tokens_p.mean_in_band": 0.783125, "tokens_rate.above_band": 0.9758454106280193, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024154589371980676 }, { "epoch": 1.1743683510638299, "grad_norm": 56.7892630894906, "learning_rate": 1.7510769261231972e-07, "loss": 0.258, "step": 7065, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47058823529411764, "success_rate.epoch.env.logic": 0.5527426160337553, "success_rate.epoch.env.math": 0.9529411764705882, "success_rate.epoch.env.sat": 0.06172839506172839, "success_rate.epoch.env.science": 0.9376518218623482, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6512300938728477, "success_rate.epoch.global": 0.8262886597938144, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.990983606557377, "tokens_p.mean_in_band": 0.83359375, "tokens_rate.above_band": 0.9838709677419355, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016129032258064516 }, { "epoch": 1.1751994680851063, "grad_norm": 72.62855285085922, "learning_rate": 1.7508186241223083e-07, "loss": 0.3656, "step": 7070, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.5504201680672269, "success_rate.epoch.env.math": 0.953307392996109, "success_rate.epoch.env.sat": 0.06097560975609756, "success_rate.epoch.env.science": 0.9378531073446328, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6501794115983014, "success_rate.epoch.global": 0.8255515649050795, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9942074970484062, "tokens_p.mean_in_band": 0.6224574183558559, "tokens_rate.above_band": 0.884133611691023, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11586638830897704 }, { "epoch": 1.176030585106383, "grad_norm": 74.54143651156309, "learning_rate": 1.7505604094063124e-07, "loss": 0.2546, "step": 7075, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.5523012552301255, "success_rate.epoch.env.math": 0.9534883720930233, "success_rate.epoch.env.sat": 0.06097560975609756, "success_rate.epoch.env.science": 0.9381028938906752, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6503895800351973, "success_rate.epoch.global": 0.8261758691206544, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9891581632653061, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.98989898989899, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010101010101010102 }, { "epoch": 1.1768617021276595, "grad_norm": 152.26976873196566, "learning_rate": 1.7503022824334778e-07, "loss": 0.4121, "step": 7080, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.5541666666666667, "success_rate.epoch.env.math": 0.9538461538461539, "success_rate.epoch.env.sat": 0.060240963855421686, "success_rate.epoch.env.science": 0.9376, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6504791848895901, "success_rate.epoch.global": 0.8260427263479145, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9951171875, "tokens_p.mean_below_band": 1.6555645743210334e-12, "tokens_p.mean_in_band": 0.74560546875, "tokens_rate.above_band": 0.9186602870813397, "tokens_rate.below_band": 0.004784688995215311, "tokens_rate.in_band": 0.07655502392344497 }, { "epoch": 1.1776928191489362, "grad_norm": 38.62991362871605, "learning_rate": 1.7500442436619164e-07, "loss": 0.2349, "step": 7085, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.5532786885245902, "success_rate.epoch.env.math": 0.9538461538461539, "success_rate.epoch.env.sat": 0.060240963855421686, "success_rate.epoch.env.science": 0.9370517928286852, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6503486225883727, "success_rate.epoch.global": 0.8253164556962025, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.43333333333333335, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9986267605633803, "tokens_p.mean_in_band": 0.6198814655172413, "tokens_rate.above_band": 0.9683578832515003, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031642116748499725 }, { "epoch": 1.1785239361702127, "grad_norm": 107.29877957976451, "learning_rate": 1.749786293549584e-07, "loss": 0.2544, "step": 7090, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46153846153846156, "success_rate.epoch.env.logic": 0.5528455284552846, "success_rate.epoch.env.math": 0.9541984732824428, "success_rate.epoch.env.sat": 0.060240963855421686, "success_rate.epoch.env.science": 0.9373015873015873, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6503639820283623, "success_rate.epoch.global": 0.8256048387096774, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9998670212765958, "tokens_p.mean_in_band": 0.5771484375, "tokens_rate.above_band": 0.9832635983263598, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016736401673640166 }, { "epoch": 1.1793550531914894, "grad_norm": 39.65841618223465, "learning_rate": 1.7495284325542785e-07, "loss": 0.2593, "step": 7095, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4642857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4528301886792453, "success_rate.epoch.env.logic": 0.5528455284552846, "success_rate.epoch.env.math": 0.9543726235741445, "success_rate.epoch.env.sat": 0.060240963855421686, "success_rate.epoch.env.science": 0.9375987361769352, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6505006443963708, "success_rate.epoch.global": 0.8258906171600602, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9953606007944389, "tokens_p.mean_below_band": 1.4466543992360434e-07, "tokens_p.mean_in_band": 0.5897534320809249, "tokens_rate.above_band": 0.9581351094196003, "tokens_rate.below_band": 0.0007136060894386299, "tokens_rate.in_band": 0.04115128449096099 }, { "epoch": 1.180186170212766, "grad_norm": 49.91994618426111, "learning_rate": 1.7492706611336405e-07, "loss": 0.3344, "step": 7100, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4642857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4528301886792453, "success_rate.epoch.env.logic": 0.5528455284552846, "success_rate.epoch.env.math": 0.9545454545454546, "success_rate.epoch.env.sat": 0.060240963855421686, "success_rate.epoch.env.science": 0.9378930817610063, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6505431149923144, "success_rate.epoch.global": 0.8265, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954187657430731, "tokens_p.mean_in_band": 0.5658413951120163, "tokens_rate.above_band": 0.8899349921542256, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11006500784577448 }, { "epoch": 1.1810172872340425, "grad_norm": 288.99483276656537, "learning_rate": 1.7490129797451506e-07, "loss": 0.3402, "step": 7105, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4642857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4528301886792453, "success_rate.epoch.env.logic": 0.5542168674698795, "success_rate.epoch.env.math": 0.9545454545454546, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.9380877742946708, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6506202856483733, "success_rate.epoch.global": 0.826195219123506, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9977879665379665, "tokens_p.mean_below_band": 4.94765117764473e-09, "tokens_p.mean_in_band": 0.7008023648648649, "tokens_rate.above_band": 0.9533742331288344, "tokens_rate.below_band": 0.001226993865030675, "tokens_rate.in_band": 0.04539877300613497 }, { "epoch": 1.1818484042553192, "grad_norm": 55.133019757459614, "learning_rate": 1.7487553888461307e-07, "loss": 0.3324, "step": 7110, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4642857142857143, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4528301886792453, "success_rate.epoch.env.logic": 0.5537848605577689, "success_rate.epoch.env.math": 0.9547169811320755, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.9383775351014041, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6506229475103046, "success_rate.epoch.global": 0.8264749628160635, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 1.0003165701881331, "tokens_p.mean_in_band": 0.5994791666666667, "tokens_rate.above_band": 0.9787535410764873, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021246458923512748 }, { "epoch": 1.1826795212765957, "grad_norm": 169.01561704980205, "learning_rate": 1.7484978888937412e-07, "loss": 0.4208, "step": 7115, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.47368421052631576, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4528301886792453, "success_rate.epoch.env.logic": 0.5533596837944664, "success_rate.epoch.env.math": 0.9547169811320755, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.9386169386169386, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6514604677823803, "success_rate.epoch.global": 0.8266666666666667, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9969466600199401, "tokens_p.mean_in_band": 0.6034307065217391, "tokens_rate.above_band": 0.9561487130600572, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0438512869399428 }, { "epoch": 1.1835106382978724, "grad_norm": 203.43035540605297, "learning_rate": 1.7482404803449814e-07, "loss": 0.2213, "step": 7120, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.46551724137931033, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4528301886792453, "success_rate.epoch.env.logic": 0.5511811023622047, "success_rate.epoch.env.math": 0.9547169811320755, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.9382239382239382, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6504842358758105, "success_rate.epoch.global": 0.826044226044226, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.2916666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9973795369211514, "tokens_p.mean_below_band": 1.2278178473934531e-11, "tokens_p.mean_in_band": 0.6227463942307693, "tokens_rate.above_band": 0.982779827798278, "tokens_rate.below_band": 0.0012300123001230013, "tokens_rate.in_band": 0.015990159901599015 }, { "epoch": 1.1843417553191489, "grad_norm": 90.52725854615451, "learning_rate": 1.7479831636566884e-07, "loss": 0.2953, "step": 7125, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4576271186440678, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4528301886792453, "success_rate.epoch.env.logic": 0.5511811023622047, "success_rate.epoch.env.math": 0.9553903345724907, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.9384141647421094, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6498454592597509, "success_rate.epoch.global": 0.826320939334638, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973421391752577, "tokens_p.mean_in_band": 0.5625, "tokens_rate.above_band": 0.9627791563275434, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03722084367245657 }, { "epoch": 1.1851728723404256, "grad_norm": 84.21501005622059, "learning_rate": 1.7477259392855363e-07, "loss": 0.4298, "step": 7130, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4576271186440678, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4528301886792453, "success_rate.epoch.env.logic": 0.546875, "success_rate.epoch.env.math": 0.9553903345724907, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.9386503067484663, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6494754628637648, "success_rate.epoch.global": 0.825938566552901, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.997867803837953, "tokens_p.mean_in_band": 0.5989040798611112, "tokens_rate.above_band": 0.975051975051975, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02494802494802495 }, { "epoch": 1.186003989361702, "grad_norm": 36.77602723989091, "learning_rate": 1.7474688076880356e-07, "loss": 0.2917, "step": 7135, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4576271186440678, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46296296296296297, "success_rate.epoch.env.logic": 0.5447470817120622, "success_rate.epoch.env.math": 0.9553903345724907, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.9389312977099237, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.650228721678059, "success_rate.epoch.global": 0.8261291889266634, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951693702290076, "tokens_p.mean_in_band": 0.680921052631579, "tokens_rate.above_band": 0.9821930646672915, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01780693533270853 }, { "epoch": 1.1868351063829787, "grad_norm": 49.01055505193855, "learning_rate": 1.7472117693205316e-07, "loss": 0.2247, "step": 7140, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4576271186440678, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46296296296296297, "success_rate.epoch.env.logic": 0.5447470817120622, "success_rate.epoch.env.math": 0.9553903345724907, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.9385898407884761, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6501976801397457, "success_rate.epoch.global": 0.8264023210831721, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9895833333333334, "tokens_p.mean_in_band": 0.6135110294117647, "tokens_rate.above_band": 0.8903225806451613, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10967741935483871 }, { "epoch": 1.1876662234042552, "grad_norm": 133.21010695596706, "learning_rate": 1.7469548246392044e-07, "loss": 0.2524, "step": 7145, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4576271186440678, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46296296296296297, "success_rate.epoch.env.logic": 0.5444015444015444, "success_rate.epoch.env.math": 0.9553903345724907, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.9389140271493213, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6501957391443208, "success_rate.epoch.global": 0.8266730861819933, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9994478798586572, "tokens_p.mean_below_band": 3.510081114654895e-12, "tokens_p.mean_in_band": 0.6101190476190477, "tokens_rate.above_band": 0.9846903270702854, "tokens_rate.below_band": 0.0006958942240779402, "tokens_rate.in_band": 0.014613778705636743 }, { "epoch": 1.188497340425532, "grad_norm": 155.33858523778767, "learning_rate": 1.7466979741000682e-07, "loss": 0.4628, "step": 7150, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4576271186440678, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46296296296296297, "success_rate.epoch.env.logic": 0.549618320610687, "success_rate.epoch.env.math": 0.9553903345724907, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.9391891891891891, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6506950062578672, "success_rate.epoch.global": 0.8274209012464045, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9932208994708994, "tokens_p.mean_in_band": 0.8370535714285714, "tokens_rate.above_band": 0.9818181818181818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01818181818181818 }, { "epoch": 1.1893284574468086, "grad_norm": 63.59817003667413, "learning_rate": 1.7464412181589697e-07, "loss": 0.4984, "step": 7155, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4576271186440678, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4727272727272727, "success_rate.epoch.env.logic": 0.549618320610687, "success_rate.epoch.env.math": 0.9555555555555556, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.9387602688573562, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6515586981138256, "success_rate.epoch.global": 0.8276849642004773, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9952242524916943, "tokens_p.mean_in_band": 0.6396484375, "tokens_rate.above_band": 0.9868852459016394, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013114754098360656 }, { "epoch": 1.190159574468085, "grad_norm": 168.22055137422205, "learning_rate": 1.7461845572715875e-07, "loss": 0.3207, "step": 7160, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4576271186440678, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4727272727272727, "success_rate.epoch.env.logic": 0.5547169811320755, "success_rate.epoch.env.math": 0.9555555555555556, "success_rate.epoch.env.sat": 0.05952380952380952, "success_rate.epoch.env.science": 0.9390787518573551, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.652051165706679, "success_rate.epoch.global": 0.8285035629453682, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9951584507042254, "tokens_p.mean_in_band": 0.8229166666666666, "tokens_rate.above_band": 0.9895470383275261, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010452961672473868 }, { "epoch": 1.1909906914893618, "grad_norm": 59.1696786261031, "learning_rate": 1.7459279918934317e-07, "loss": 0.4115, "step": 7165, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4576271186440678, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4727272727272727, "success_rate.epoch.env.logic": 0.556390977443609, "success_rate.epoch.env.math": 0.9560439560439561, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.9392142327650111, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6521964018517286, "success_rate.epoch.global": 0.828679602460956, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9997452445652174, "tokens_p.mean_in_band": 0.6395089285714286, "tokens_rate.above_band": 0.9633507853403142, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03664921465968586 }, { "epoch": 1.1918218085106382, "grad_norm": 108.83992066461903, "learning_rate": 1.7456715224798435e-07, "loss": 0.3053, "step": 7170, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4576271186440678, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4727272727272727, "success_rate.epoch.env.logic": 0.556390977443609, "success_rate.epoch.env.math": 0.9562043795620438, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.9396170839469808, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6522476086426429, "success_rate.epoch.global": 0.8294865756005653, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9919642857142857, "tokens_p.mean_in_band": 0.840625, "tokens_rate.above_band": 0.9655172413793104, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034482758620689655 }, { "epoch": 1.192652925531915, "grad_norm": 36.42211282594382, "learning_rate": 1.745415149485993e-07, "loss": 0.2745, "step": 7175, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4426229508196721, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4727272727272727, "success_rate.epoch.env.logic": 0.556390977443609, "success_rate.epoch.env.math": 0.9563636363636364, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.9398826979472141, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6509222180042274, "success_rate.epoch.global": 0.8292682926829268, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.998784046692607, "tokens_p.mean_in_band": 0.700390625, "tokens_rate.above_band": 0.9903660886319846, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009633911368015413 }, { "epoch": 1.1934840425531914, "grad_norm": 67.20591533191127, "learning_rate": 1.7451588733668793e-07, "loss": 0.3741, "step": 7180, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4426229508196721, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4727272727272727, "success_rate.epoch.env.logic": 0.5543071161048689, "success_rate.epoch.env.math": 0.9565217391304348, "success_rate.epoch.env.sat": 0.058823529411764705, "success_rate.epoch.env.science": 0.9393718042366691, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.650700704160365, "success_rate.epoch.global": 0.8288920056100981, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994257656458056, "tokens_p.mean_in_band": 0.5396075581395349, "tokens_rate.above_band": 0.9458438287153652, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05415617128463476 }, { "epoch": 1.194315159574468, "grad_norm": 130.2437482582585, "learning_rate": 1.7449026945773302e-07, "loss": 0.2298, "step": 7185, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4727272727272727, "success_rate.epoch.env.logic": 0.5522388059701493, "success_rate.epoch.env.math": 0.9568345323741008, "success_rate.epoch.env.sat": 0.05747126436781609, "success_rate.epoch.env.science": 0.9395484340859432, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6512515041894928, "success_rate.epoch.global": 0.8282922289436947, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9954782958199357, "tokens_p.mean_in_band": 0.6380013992537313, "tokens_rate.above_band": 0.933, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.067 }, { "epoch": 1.1951462765957448, "grad_norm": 87.47231132390999, "learning_rate": 1.7446466135719997e-07, "loss": 0.3221, "step": 7190, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4727272727272727, "success_rate.epoch.env.logic": 0.5522388059701493, "success_rate.epoch.env.math": 0.9572953736654805, "success_rate.epoch.env.sat": 0.05747126436781609, "success_rate.epoch.env.science": 0.9390862944162437, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6512513861551001, "success_rate.epoch.global": 0.8285449490268767, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9285714285714286, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9855263157894737, "tokens_p.mean_in_band": 0.5767045454545454, "tokens_rate.above_band": 0.8962264150943396, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10377358490566038 }, { "epoch": 1.1959773936170213, "grad_norm": 47.18650746120833, "learning_rate": 1.7443906308053696e-07, "loss": 0.2442, "step": 7195, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4727272727272727, "success_rate.epoch.env.logic": 0.5539033457249071, "success_rate.epoch.env.math": 0.9575971731448764, "success_rate.epoch.env.sat": 0.05747126436781609, "success_rate.epoch.env.science": 0.9393063583815029, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6514501500823194, "success_rate.epoch.global": 0.8291782086795937, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9919444444444444, "tokens_p.mean_in_band": 0.8658854166666666, "tokens_rate.above_band": 0.9868421052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013157894736842105 }, { "epoch": 1.196808510638298, "grad_norm": 54.65375532986128, "learning_rate": 1.7441347467317455e-07, "loss": 0.3442, "step": 7200, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4727272727272727, "success_rate.epoch.env.logic": 0.5571955719557196, "success_rate.epoch.env.math": 0.9578947368421052, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.93943763518385, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6517290577351154, "success_rate.epoch.global": 0.8293468261269549, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9917834394904459, "tokens_p.mean_in_band": 0.5362582076848249, "tokens_rate.above_band": 0.8842081549898626, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11579184501013742 }, { "epoch": 1.1976396276595744, "grad_norm": 78.06349294447963, "learning_rate": 1.74387896180526e-07, "loss": 0.3545, "step": 7205, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4603174603174603, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4727272727272727, "success_rate.epoch.env.logic": 0.5571955719557196, "success_rate.epoch.env.math": 0.9581881533101045, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.93974175035868, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6525747021655228, "success_rate.epoch.global": 0.8301282051282052, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9970868644067796, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.1984707446808511, "grad_norm": 157.83051673803342, "learning_rate": 1.7436232764798683e-07, "loss": 0.2677, "step": 7210, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4603174603174603, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4727272727272727, "success_rate.epoch.env.logic": 0.5567765567765568, "success_rate.epoch.env.math": 0.9581881533101045, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.9400855920114123, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6525678682085746, "success_rate.epoch.global": 0.8304466727438469, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988954344624448, "tokens_p.mean_in_band": 0.5855334051724138, "tokens_rate.above_band": 0.9590395480225988, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04096045197740113 }, { "epoch": 1.1993018617021276, "grad_norm": 77.89964694815394, "learning_rate": 1.743367691209349e-07, "loss": 0.4176, "step": 7215, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4603174603174603, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4727272727272727, "success_rate.epoch.env.logic": 0.5547445255474452, "success_rate.epoch.env.math": 0.9584775086505191, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.9402985074626866, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6524287990778999, "success_rate.epoch.global": 0.8306085376930064, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9954438250428816, "tokens_p.mean_in_band": 0.5691817434210527, "tokens_rate.above_band": 0.96843853820598, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03156146179401993 }, { "epoch": 1.2001329787234043, "grad_norm": 107.20550330360727, "learning_rate": 1.7431122064473036e-07, "loss": 0.2692, "step": 7220, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4603174603174603, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.8, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4727272727272727, "success_rate.epoch.env.logic": 0.5547445255474452, "success_rate.epoch.env.math": 0.9587628865979382, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.9405940594059405, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6524816108861429, "success_rate.epoch.global": 0.8312980551786522, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.990166083916084, "tokens_p.mean_in_band": 0.7708333333333334, "tokens_rate.above_band": 0.9794520547945206, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02054794520547945 }, { "epoch": 1.2009640957446808, "grad_norm": 861.2179674595136, "learning_rate": 1.742856822647155e-07, "loss": 0.3342, "step": 7225, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4603174603174603, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48214285714285715, "success_rate.epoch.env.logic": 0.5547445255474452, "success_rate.epoch.env.math": 0.9587628865979382, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.9408450704225352, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6412391801678562, "success_rate.epoch.global": 0.8314556106354214, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9916618329466357, "tokens_p.mean_in_band": 0.6695913461538462, "tokens_rate.above_band": 0.9298813376483279, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07011866235167206 }, { "epoch": 1.2017952127659575, "grad_norm": 43.7755808881625, "learning_rate": 1.7426015402621467e-07, "loss": 0.3397, "step": 7230, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4603174603174603, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48214285714285715, "success_rate.epoch.env.logic": 0.5563636363636364, "success_rate.epoch.env.math": 0.9590443686006825, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.9410526315789474, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6414308305292514, "success_rate.epoch.global": 0.8320610687022901, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.987300796812749, "tokens_p.mean_in_band": 0.84228515625, "tokens_rate.above_band": 0.9691119691119691, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03088803088803089 }, { "epoch": 1.202626329787234, "grad_norm": 44.13057072630015, "learning_rate": 1.7423463597453422e-07, "loss": 0.4113, "step": 7235, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4603174603174603, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48214285714285715, "success_rate.epoch.env.logic": 0.5563636363636364, "success_rate.epoch.env.math": 0.9593220338983051, "success_rate.epoch.env.sat": 0.056818181818181816, "success_rate.epoch.env.science": 0.9412997903563941, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6414785418088031, "success_rate.epoch.global": 0.8326621923937361, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9867869127516778, "tokens_p.mean_in_band": 0.79150390625, "tokens_rate.above_band": 0.9490445859872612, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.050955414012738856 }, { "epoch": 1.2034574468085106, "grad_norm": 86.4276008076503, "learning_rate": 1.7420912815496246e-07, "loss": 0.3673, "step": 7240, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.453125, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48214285714285715, "success_rate.epoch.env.logic": 0.5563636363636364, "success_rate.epoch.env.math": 0.9593220338983051, "success_rate.epoch.env.sat": 0.056179775280898875, "success_rate.epoch.env.science": 0.941544885177453, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6407889261693772, "success_rate.epoch.global": 0.8323673651359786, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9958333333333333, "tokens_p.mean_in_band": 0.661110101744186, "tokens_rate.above_band": 0.8847184986595175, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11528150134048257 }, { "epoch": 1.2042885638297873, "grad_norm": 42.279391788396666, "learning_rate": 1.7418363061276958e-07, "loss": 0.3394, "step": 7245, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48214285714285715, "success_rate.epoch.env.logic": 0.5579710144927537, "success_rate.epoch.env.math": 0.9594594594594594, "success_rate.epoch.env.sat": 0.05555555555555555, "success_rate.epoch.env.science": 0.9416666666666667, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6410310007147951, "success_rate.epoch.global": 0.8320746334962239, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.996510152284264, "tokens_p.mean_in_band": 0.70751953125, "tokens_rate.above_band": 0.9656862745098039, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03431372549019608 }, { "epoch": 1.2051196808510638, "grad_norm": 78.52708401960292, "learning_rate": 1.741581433932074e-07, "loss": 0.3322, "step": 7250, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48214285714285715, "success_rate.epoch.env.logic": 0.5539568345323741, "success_rate.epoch.env.math": 0.959866220735786, "success_rate.epoch.env.sat": 0.05555555555555555, "success_rate.epoch.env.science": 0.9418282548476454, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6407177433963338, "success_rate.epoch.global": 0.831858407079646, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0002920560747663, "tokens_p.mean_in_band": 0.5736736918604651, "tokens_rate.above_band": 0.9647540983606557, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03524590163934426 }, { "epoch": 1.2059507978723405, "grad_norm": 131.0447295822214, "learning_rate": 1.7413266654150945e-07, "loss": 0.2896, "step": 7255, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49122807017543857, "success_rate.epoch.env.logic": 0.5539568345323741, "success_rate.epoch.env.math": 0.96, "success_rate.epoch.env.sat": 0.054945054945054944, "success_rate.epoch.env.science": 0.9419889502762431, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6415149422249603, "success_rate.epoch.global": 0.8319364799294221, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9943181818181818, "tokens_p.mean_below_band": 1.3869794202037156e-11, "tokens_p.mean_in_band": 0.61572265625, "tokens_rate.above_band": 0.9556737588652482, "tokens_rate.below_band": 0.0017730496453900709, "tokens_rate.in_band": 0.0425531914893617 }, { "epoch": 1.206781914893617, "grad_norm": 87.68702110098192, "learning_rate": 1.7410720010289102e-07, "loss": 0.2921, "step": 7260, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49122807017543857, "success_rate.epoch.env.logic": 0.5539568345323741, "success_rate.epoch.env.math": 0.9601328903654485, "success_rate.epoch.env.sat": 0.054945054945054944, "success_rate.epoch.env.science": 0.9416208791208791, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6414935621531499, "success_rate.epoch.global": 0.8321616871704746, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.985608552631579, "tokens_p.mean_below_band": 9.918585419654846e-08, "tokens_p.mean_in_band": 0.6337890625, "tokens_rate.above_band": 0.9440993788819876, "tokens_rate.below_band": 0.006211180124223602, "tokens_rate.in_band": 0.049689440993788817 }, { "epoch": 1.2076130319148937, "grad_norm": 175.50684701828033, "learning_rate": 1.740817441225487e-07, "loss": 0.2506, "step": 7265, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49122807017543857, "success_rate.epoch.env.logic": 0.5539568345323741, "success_rate.epoch.env.math": 0.9605263157894737, "success_rate.epoch.env.sat": 0.054945054945054944, "success_rate.epoch.env.science": 0.9411764705882353, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6414889273250937, "success_rate.epoch.global": 0.8323851203501094, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9948051948051948, "tokens_p.mean_below_band": 1.6079866327345371e-09, "tokens_p.mean_in_band": 0.736328125, "tokens_rate.above_band": 0.9871794871794872, "tokens_rate.below_band": 0.002564102564102564, "tokens_rate.in_band": 0.010256410256410256 }, { "epoch": 1.2084441489361701, "grad_norm": 28.995324676765556, "learning_rate": 1.7405629864566066e-07, "loss": 0.311, "step": 7270, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49122807017543857, "success_rate.epoch.env.logic": 0.5555555555555556, "success_rate.epoch.env.math": 0.9611650485436893, "success_rate.epoch.env.sat": 0.054945054945054944, "success_rate.epoch.env.science": 0.941296928327645, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6424433840646875, "success_rate.epoch.global": 0.8331154684095861, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9935154525386314, "tokens_p.mean_in_band": 0.7317708333333334, "tokens_rate.above_band": 0.993421052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006578947368421052 }, { "epoch": 1.2092752659574468, "grad_norm": 72.1276390026, "learning_rate": 1.740308637173864e-07, "loss": 0.2485, "step": 7275, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49122807017543857, "success_rate.epoch.env.logic": 0.5567375886524822, "success_rate.epoch.env.math": 0.9614147909967846, "success_rate.epoch.env.sat": 0.05434782608695652, "success_rate.epoch.env.science": 0.9414567733151804, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6425337833082746, "success_rate.epoch.global": 0.8329718004338394, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9969602609727165, "tokens_p.mean_in_band": 0.6417410714285714, "tokens_rate.above_band": 0.9601366742596811, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03986332574031891 }, { "epoch": 1.2101063829787235, "grad_norm": 90.22251660236212, "learning_rate": 1.7400543938286668e-07, "loss": 0.4147, "step": 7280, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49122807017543857, "success_rate.epoch.env.logic": 0.5567375886524822, "success_rate.epoch.env.math": 0.9616613418530351, "success_rate.epoch.env.sat": 0.053763440860215055, "success_rate.epoch.env.science": 0.9416553595658074, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.642521124388287, "success_rate.epoch.global": 0.8331171638564635, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9881410256410257, "tokens_p.mean_in_band": 0.6795176630434783, "tokens_rate.above_band": 0.8944954128440367, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10550458715596331 }, { "epoch": 1.2109375, "grad_norm": 57.9272961121597, "learning_rate": 1.7398002568722344e-07, "loss": 0.3213, "step": 7285, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5567375886524822, "success_rate.epoch.env.math": 0.9619047619047619, "success_rate.epoch.env.sat": 0.053763440860215055, "success_rate.epoch.env.science": 0.9418918918918919, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6433622045885028, "success_rate.epoch.global": 0.8337639965546942, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9958832335329342, "tokens_p.mean_in_band": 0.828125, "tokens_rate.above_band": 0.9940476190476191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005952380952380952 }, { "epoch": 1.2117686170212765, "grad_norm": 85.92478016633245, "learning_rate": 1.7395462267555976e-07, "loss": 0.1761, "step": 7290, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5567375886524822, "success_rate.epoch.env.math": 0.9620253164556962, "success_rate.epoch.env.sat": 0.05319148936170213, "success_rate.epoch.env.science": 0.9421654337592468, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.643346035944846, "success_rate.epoch.global": 0.833976833976834, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9924107142857143, "tokens_p.mean_in_band": 0.7425986842105263, "tokens_rate.above_band": 0.9170305676855895, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08296943231441048 }, { "epoch": 1.2125997340425532, "grad_norm": 193.41476546345996, "learning_rate": 1.7392923039295982e-07, "loss": 0.4619, "step": 7295, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5567375886524822, "success_rate.epoch.env.math": 0.9622641509433962, "success_rate.epoch.env.sat": 0.05319148936170213, "success_rate.epoch.env.science": 0.942397856664434, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6433888775260176, "success_rate.epoch.global": 0.8345446772124839, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9955628803245437, "tokens_p.mean_in_band": 0.8763020833333334, "tokens_rate.above_band": 0.9939516129032258, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006048387096774193 }, { "epoch": 1.2134308510638299, "grad_norm": 31.936847658755845, "learning_rate": 1.739038488844886e-07, "loss": 0.3176, "step": 7300, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5578947368421052, "success_rate.epoch.env.math": 0.9625, "success_rate.epoch.env.sat": 0.05319148936170213, "success_rate.epoch.env.science": 0.9425517702070808, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6435295057795515, "success_rate.epoch.global": 0.8347529812606473, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.8222222222222223, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9965625, "tokens_p.mean_in_band": 0.6397429435483871, "tokens_rate.above_band": 0.9626955475330926, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03730445246690734 }, { "epoch": 1.2142619680851063, "grad_norm": 77.57606503227909, "learning_rate": 1.7387847819519216e-07, "loss": 0.2923, "step": 7305, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4915254237288136, "success_rate.epoch.env.logic": 0.5578947368421052, "success_rate.epoch.env.math": 0.9629629629629629, "success_rate.epoch.env.sat": 0.05319148936170213, "success_rate.epoch.env.science": 0.9427048634243838, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6428150948621951, "success_rate.epoch.global": 0.8349596945269411, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9904157667386609, "tokens_p.mean_in_band": 0.7289635894495413, "tokens_rate.above_band": 0.8946859903381642, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10531400966183575 }, { "epoch": 1.215093085106383, "grad_norm": 81.84160403636776, "learning_rate": 1.7385311837009725e-07, "loss": 0.2625, "step": 7310, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5578947368421052, "success_rate.epoch.env.math": 0.963302752293578, "success_rate.epoch.env.sat": 0.05263157894736842, "success_rate.epoch.env.science": 0.9427811044577512, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6435724308822711, "success_rate.epoch.global": 0.8350253807106599, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.997244837172359, "tokens_p.mean_in_band": 0.6151815878378378, "tokens_rate.above_band": 0.9714506172839507, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02854938271604938 }, { "epoch": 1.2159242021276595, "grad_norm": 23.928531410931644, "learning_rate": 1.7382776945421128e-07, "loss": 0.3848, "step": 7315, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5578947368421052, "success_rate.epoch.env.math": 0.963302752293578, "success_rate.epoch.env.sat": 0.05263157894736842, "success_rate.epoch.env.science": 0.9417989417989417, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6434831433678339, "success_rate.epoch.global": 0.8348082595870207, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99235807860262, "tokens_p.mean_below_band": 4.1443854570388794e-08, "tokens_p.mean_in_band": 0.6575520833333334, "tokens_rate.above_band": 0.9703389830508474, "tokens_rate.below_band": 0.00423728813559322, "tokens_rate.in_band": 0.025423728813559324 }, { "epoch": 1.2167553191489362, "grad_norm": 85.70987555682437, "learning_rate": 1.7380243149252238e-07, "loss": 0.3901, "step": 7320, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5559440559440559, "success_rate.epoch.env.math": 0.9636363636363636, "success_rate.epoch.env.sat": 0.052083333333333336, "success_rate.epoch.env.science": 0.9419141914191419, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6432967737724612, "success_rate.epoch.global": 0.8345233095338094, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.995127688172043, "tokens_p.mean_below_band": 1.1874362826347351e-08, "tokens_p.mean_in_band": 0.6556396484375, "tokens_rate.above_band": 0.916256157635468, "tokens_rate.below_band": 0.0049261083743842365, "tokens_rate.in_band": 0.07881773399014778 }, { "epoch": 1.2175864361702127, "grad_norm": 92.9236694079965, "learning_rate": 1.7377710452999928e-07, "loss": 0.2393, "step": 7325, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5559440559440559, "success_rate.epoch.env.math": 0.9637462235649547, "success_rate.epoch.env.sat": 0.052083333333333336, "success_rate.epoch.env.science": 0.9415627051871307, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6432748077448776, "success_rate.epoch.global": 0.8347280334728033, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9842557251908397, "tokens_p.mean_in_band": 0.7760416666666666, "tokens_rate.above_band": 0.9562043795620438, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043795620437956206 }, { "epoch": 1.2184175531914894, "grad_norm": 59.83939440072019, "learning_rate": 1.7375178861159107e-07, "loss": 0.3882, "step": 7330, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5590277777777778, "success_rate.epoch.env.math": 0.9637462235649547, "success_rate.epoch.env.sat": 0.052083333333333336, "success_rate.epoch.env.science": 0.9417920209287116, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.643575992978996, "success_rate.epoch.global": 0.835279399499583, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9936995967741935, "tokens_p.mean_in_band": 0.84765625, "tokens_rate.above_band": 0.9959839357429718, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004016064257028112 }, { "epoch": 1.219248670212766, "grad_norm": 243.97516346494928, "learning_rate": 1.7372648378222736e-07, "loss": 0.5224, "step": 7335, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5605536332179931, "success_rate.epoch.env.math": 0.9637462235649547, "success_rate.epoch.env.sat": 0.052083333333333336, "success_rate.epoch.env.science": 0.9414443721535458, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6436831026758187, "success_rate.epoch.global": 0.8354798504362276, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9871651785714286, "tokens_p.mean_in_band": 0.6995738636363636, "tokens_rate.above_band": 0.9531914893617022, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04680851063829787 }, { "epoch": 1.2200797872340425, "grad_norm": 225.05026430831384, "learning_rate": 1.73701190086818e-07, "loss": 0.2789, "step": 7340, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5605536332179931, "success_rate.epoch.env.math": 0.9610778443113772, "success_rate.epoch.env.sat": 0.05154639175257732, "success_rate.epoch.env.science": 0.9416342412451362, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6434089706992057, "success_rate.epoch.global": 0.8352649006622517, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.990625, "tokens_p.mean_below_band": 6.344635039567947e-09, "tokens_p.mean_in_band": 0.7288240131578947, "tokens_rate.above_band": 0.8823529411764706, "tokens_rate.below_band": 0.0058823529411764705, "tokens_rate.in_band": 0.11176470588235295 }, { "epoch": 1.2209109042553192, "grad_norm": 60.98321741520074, "learning_rate": 1.7367590757025308e-07, "loss": 0.398, "step": 7345, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5567010309278351, "success_rate.epoch.env.math": 0.9610778443113772, "success_rate.epoch.env.sat": 0.05154639175257732, "success_rate.epoch.env.science": 0.9418604651162791, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6430792999338407, "success_rate.epoch.global": 0.834983498349835, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9981831395348837, "tokens_p.mean_in_band": 0.6392983490566038, "tokens_rate.above_band": 0.9578361177406524, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.042163882259347654 }, { "epoch": 1.2217420212765957, "grad_norm": 59.839232518173475, "learning_rate": 1.736506362774029e-07, "loss": 0.3146, "step": 7350, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081967213114754, "success_rate.epoch.env.logic": 0.5582191780821918, "success_rate.epoch.env.math": 0.9610778443113772, "success_rate.epoch.env.sat": 0.05154639175257732, "success_rate.epoch.env.science": 0.941972920696325, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6439726930289205, "success_rate.epoch.global": 0.8353231782626596, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974731522425774, "tokens_p.mean_in_band": 0.7495561079545454, "tokens_rate.above_band": 0.9862928348909658, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013707165109034268 }, { "epoch": 1.2225731382978724, "grad_norm": 88.7350089943168, "learning_rate": 1.7362537625311778e-07, "loss": 0.2237, "step": 7355, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081967213114754, "success_rate.epoch.env.logic": 0.5612244897959183, "success_rate.epoch.env.math": 0.9610778443113772, "success_rate.epoch.env.sat": 0.05154639175257732, "success_rate.epoch.env.science": 0.9421593830334191, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6442628543062678, "success_rate.epoch.global": 0.8357963875205254, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9946808510638298, "tokens_p.mean_in_band": 0.7734375, "tokens_rate.above_band": 0.9915611814345991, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008438818565400843 }, { "epoch": 1.2234042553191489, "grad_norm": 95.5187307601354, "learning_rate": 1.736001275422281e-07, "loss": 0.2678, "step": 7360, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081967213114754, "success_rate.epoch.env.logic": 0.5612244897959183, "success_rate.epoch.env.math": 0.9613095238095238, "success_rate.epoch.env.sat": 0.05102040816326531, "success_rate.epoch.env.science": 0.942381562099872, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6442562974858395, "success_rate.epoch.global": 0.8359918200408998, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9953218562874252, "tokens_p.mean_in_band": 0.7462890625, "tokens_rate.above_band": 0.893048128342246, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10695187165775401 }, { "epoch": 1.2242353723404256, "grad_norm": 54.862413313771825, "learning_rate": 1.7357489018954415e-07, "loss": 0.4193, "step": 7365, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081967213114754, "success_rate.epoch.env.logic": 0.5612244897959183, "success_rate.epoch.env.math": 0.9615384615384616, "success_rate.epoch.env.sat": 0.050505050505050504, "success_rate.epoch.env.science": 0.9426020408163265, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6442503028301283, "success_rate.epoch.global": 0.8361858190709046, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.991640127388535, "tokens_p.mean_in_band": 0.6, "tokens_rate.above_band": 0.9127906976744186, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0872093023255814 }, { "epoch": 1.225066489361702, "grad_norm": 56.44767446130195, "learning_rate": 1.7354966423985605e-07, "loss": 0.3954, "step": 7370, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081967213114754, "success_rate.epoch.env.logic": 0.559322033898305, "success_rate.epoch.env.math": 0.961764705882353, "success_rate.epoch.env.sat": 0.050505050505050504, "success_rate.epoch.env.science": 0.9427480916030534, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6434926323764921, "success_rate.epoch.global": 0.8359057676685622, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9964012925969448, "tokens_p.mean_in_band": 0.6123046875, "tokens_rate.above_band": 0.9747995418098511, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025200458190148912 }, { "epoch": 1.2258976063829787, "grad_norm": 39.12242616722017, "learning_rate": 1.7352444973793362e-07, "loss": 0.2677, "step": 7375, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081967213114754, "success_rate.epoch.env.logic": 0.5604026845637584, "success_rate.epoch.env.math": 0.961764705882353, "success_rate.epoch.env.sat": 0.050505050505050504, "success_rate.epoch.env.science": 0.9429657794676806, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.643610663151954, "success_rate.epoch.global": 0.8360987454471873, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9993003731343284, "tokens_p.mean_in_band": 0.6050646551724138, "tokens_rate.above_band": 0.9651860744297719, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03481392557022809 }, { "epoch": 1.2267287234042552, "grad_norm": 109.84265599011383, "learning_rate": 1.734992467285265e-07, "loss": 0.432, "step": 7380, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081967213114754, "success_rate.epoch.env.logic": 0.5618729096989966, "success_rate.epoch.env.math": 0.961764705882353, "success_rate.epoch.env.sat": 0.05, "success_rate.epoch.env.science": 0.943217665615142, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6403122141317403, "success_rate.epoch.global": 0.8359532446594116, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9902676399026764, "tokens_p.mean_in_band": 0.3748739919354839, "tokens_rate.above_band": 0.6236722306525038, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.37632776934749623 }, { "epoch": 1.227559840425532, "grad_norm": 1897.7888927282638, "learning_rate": 1.7347405525636384e-07, "loss": 0.4824, "step": 7385, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081967213114754, "success_rate.epoch.env.logic": 0.5618729096989966, "success_rate.epoch.env.math": 0.9618768328445748, "success_rate.epoch.env.sat": 0.05, "success_rate.epoch.env.science": 0.943502824858757, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6403483310595436, "success_rate.epoch.global": 0.8365461847389558, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.984375, "tokens_p.mean_in_band": 0.76953125, "tokens_rate.above_band": 0.937799043062201, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06220095693779904 }, { "epoch": 1.2283909574468086, "grad_norm": 76.34445281624471, "learning_rate": 1.7344887536615427e-07, "loss": 0.3469, "step": 7390, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5161290322580645, "success_rate.epoch.env.logic": 0.56, "success_rate.epoch.env.math": 0.9619883040935673, "success_rate.epoch.env.sat": 0.04950495049504951, "success_rate.epoch.env.science": 0.943609022556391, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6408739693040226, "success_rate.epoch.global": 0.8362034441329596, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7777777777777778, "tokens_p.mean_above_band": 0.997297503045067, "tokens_p.mean_in_band": 0.5390625, "tokens_rate.above_band": 0.9624853458382181, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03751465416178194 }, { "epoch": 1.229222074468085, "grad_norm": 62.07374099914586, "learning_rate": 1.7342370710258601e-07, "loss": 0.3277, "step": 7395, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.6666666666666666, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5079365079365079, "success_rate.epoch.env.logic": 0.56, "success_rate.epoch.env.math": 0.9619883040935673, "success_rate.epoch.env.sat": 0.04950495049504951, "success_rate.epoch.env.science": 0.9438902743142145, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6401547627073196, "success_rate.epoch.global": 0.8363926576217079, "success_rate.window.env.ded": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9921965948777648, "tokens_p.mean_in_band": 0.5486670020120724, "tokens_rate.above_band": 0.8736333587592169, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12636664124078312 }, { "epoch": 1.2300531914893618, "grad_norm": 351.29815142955675, "learning_rate": 1.733985505103265e-07, "loss": 0.3053, "step": 7400, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5079365079365079, "success_rate.epoch.env.logic": 0.56, "success_rate.epoch.env.math": 0.9622093023255814, "success_rate.epoch.env.sat": 0.04950495049504951, "success_rate.epoch.env.science": 0.9440298507462687, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6445165465512392, "success_rate.epoch.global": 0.8368483883804219, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.990234375, "tokens_p.mean_in_band": 0.820556640625, "tokens_rate.above_band": 0.9655172413793104, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034482758620689655 }, { "epoch": 1.2308843085106382, "grad_norm": 47.58568719033422, "learning_rate": 1.7337340563402254e-07, "loss": 0.2302, "step": 7405, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5079365079365079, "success_rate.epoch.env.logic": 0.56, "success_rate.epoch.env.math": 0.9623188405797102, "success_rate.epoch.env.sat": 0.04854368932038835, "success_rate.epoch.env.science": 0.9442033477991321, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6444548896541784, "success_rate.epoch.global": 0.836572788575962, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99300799086758, "tokens_p.mean_in_band": 0.6189516129032258, "tokens_rate.above_band": 0.876, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.124 }, { "epoch": 1.231715425531915, "grad_norm": 71.9789341886777, "learning_rate": 1.7334827251830018e-07, "loss": 0.3609, "step": 7410, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5079365079365079, "success_rate.epoch.env.logic": 0.5614617940199336, "success_rate.epoch.env.math": 0.962536023054755, "success_rate.epoch.env.sat": 0.05714285714285714, "success_rate.epoch.env.science": 0.9443413729128015, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6454018141479163, "success_rate.epoch.global": 0.8367588932806325, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9929748062015504, "tokens_p.mean_in_band": 0.6744791666666666, "tokens_rate.above_band": 0.9247311827956989, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07526881720430108 }, { "epoch": 1.2325465425531914, "grad_norm": 291.68875604555234, "learning_rate": 1.733231512077645e-07, "loss": 0.2239, "step": 7415, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.515625, "success_rate.epoch.env.logic": 0.5614617940199336, "success_rate.epoch.env.math": 0.962536023054755, "success_rate.epoch.env.sat": 0.056074766355140186, "success_rate.epoch.env.science": 0.9445129469790382, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6460192664517355, "success_rate.epoch.global": 0.8364854215918046, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9944661458333334, "tokens_p.mean_in_band": 0.6645833333333333, "tokens_rate.above_band": 0.927536231884058, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07246376811594203 }, { "epoch": 1.233377659574468, "grad_norm": 209.55077672755175, "learning_rate": 1.732980417469996e-07, "loss": 0.2544, "step": 7420, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.515625, "success_rate.epoch.env.logic": 0.5629139072847682, "success_rate.epoch.env.math": 0.9626436781609196, "success_rate.epoch.env.sat": 0.056074766355140186, "success_rate.epoch.env.science": 0.9447513812154696, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6461827394160474, "success_rate.epoch.global": 0.8370632116215155, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966299019607843, "tokens_p.mean_in_band": 0.603271484375, "tokens_rate.above_band": 0.9272727272727272, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07272727272727272 }, { "epoch": 1.2342087765957448, "grad_norm": 85.7948375372207, "learning_rate": 1.7327294418056876e-07, "loss": 0.4763, "step": 7425, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5076923076923077, "success_rate.epoch.env.logic": 0.5657894736842105, "success_rate.epoch.env.math": 0.9627507163323782, "success_rate.epoch.env.sat": 0.056074766355140186, "success_rate.epoch.env.science": 0.944920440636475, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6457481004782486, "success_rate.epoch.global": 0.837245696400626, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9953947368421052, "tokens_p.mean_below_band": 9.049472282640636e-11, "tokens_p.mean_in_band": 0.8459821428571429, "tokens_rate.above_band": 0.979381443298969, "tokens_rate.below_band": 0.002577319587628866, "tokens_rate.in_band": 0.01804123711340206 }, { "epoch": 1.2350398936170213, "grad_norm": 43.8859169989041, "learning_rate": 1.7324785855301395e-07, "loss": 0.2535, "step": 7430, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5076923076923077, "success_rate.epoch.env.logic": 0.5657894736842105, "success_rate.epoch.env.math": 0.9628571428571429, "success_rate.epoch.env.sat": 0.056074766355140186, "success_rate.epoch.env.science": 0.9451553930530164, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6457791349274583, "success_rate.epoch.global": 0.8377535101404057, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9878318584070797, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9912280701754386, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008771929824561403 }, { "epoch": 1.235871010638298, "grad_norm": 90.09312044481688, "learning_rate": 1.7322278490885598e-07, "loss": 0.3369, "step": 7435, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5076923076923077, "success_rate.epoch.env.logic": 0.5657894736842105, "success_rate.epoch.env.math": 0.9630681818181818, "success_rate.epoch.env.sat": 0.056074766355140186, "success_rate.epoch.env.science": 0.9447144592952612, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.645758235400484, "success_rate.epoch.global": 0.8378063010501751, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.5714285714285715, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9956896551724138, "tokens_p.mean_below_band": 4.377298878921465e-08, "tokens_p.mean_in_band": 0.5210129310344828, "tokens_rate.above_band": 0.9555873925501432, "tokens_rate.below_band": 0.0028653295128939827, "tokens_rate.in_band": 0.04154727793696275 }, { "epoch": 1.2367021276595744, "grad_norm": 50.87681601760614, "learning_rate": 1.7319772329259447e-07, "loss": 0.4331, "step": 7440, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5076923076923077, "success_rate.epoch.env.logic": 0.5672131147540984, "success_rate.epoch.env.math": 0.9630681818181818, "success_rate.epoch.env.sat": 0.056074766355140186, "success_rate.epoch.env.science": 0.9443436176648518, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6458539444404366, "success_rate.epoch.global": 0.8379216750678558, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9953908554572272, "tokens_p.mean_in_band": 0.5479561941964286, "tokens_rate.above_band": 0.9603399433427762, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039660056657223795 }, { "epoch": 1.2375332446808511, "grad_norm": 25.949806146757872, "learning_rate": 1.731726737487076e-07, "loss": 0.1965, "step": 7445, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4492753623188406, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5076923076923077, "success_rate.epoch.env.logic": 0.5686274509803921, "success_rate.epoch.env.math": 0.9630681818181818, "success_rate.epoch.env.sat": 0.05555555555555555, "success_rate.epoch.env.science": 0.9445114595898673, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6453499423249265, "success_rate.epoch.global": 0.8376497873985311, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9948586118251928, "tokens_p.mean_in_band": 0.7029296875, "tokens_rate.above_band": 0.9511002444987775, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0488997555012225 }, { "epoch": 1.2383643617021276, "grad_norm": 70.4545634397591, "learning_rate": 1.7314763632165225e-07, "loss": 0.3787, "step": 7450, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4492753623188406, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5076923076923077, "success_rate.epoch.env.logic": 0.564935064935065, "success_rate.epoch.env.math": 0.9630681818181818, "success_rate.epoch.env.sat": 0.05555555555555555, "success_rate.epoch.env.science": 0.9446782922429344, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6450294374710848, "success_rate.epoch.global": 0.8373168851195065, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.999290293040293, "tokens_p.mean_in_band": 0.631907393292683, "tokens_rate.above_band": 0.9708392603129445, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029160739687055477 }, { "epoch": 1.2391954787234043, "grad_norm": 25.7788781614811, "learning_rate": 1.7312261105586365e-07, "loss": 0.2402, "step": 7455, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4492753623188406, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5076923076923077, "success_rate.epoch.env.logic": 0.5631067961165048, "success_rate.epoch.env.math": 0.9630681818181818, "success_rate.epoch.env.sat": 0.05504587155963303, "success_rate.epoch.env.science": 0.9443447037701975, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6447865700813376, "success_rate.epoch.global": 0.8367895545314901, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.2916666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9933127572016461, "tokens_p.mean_in_band": 0.6124237804878049, "tokens_rate.above_band": 0.9467532467532468, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.053246753246753244 }, { "epoch": 1.2400265957446808, "grad_norm": 590.1470970112662, "learning_rate": 1.7309759799575557e-07, "loss": 0.3905, "step": 7460, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4492753623188406, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5076923076923077, "success_rate.epoch.env.logic": 0.5612903225806452, "success_rate.epoch.env.math": 0.9630681818181818, "success_rate.epoch.env.sat": 0.05504587155963303, "success_rate.epoch.env.science": 0.9445107398568019, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6446365303132234, "success_rate.epoch.global": 0.8367816091954023, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.997577519379845, "tokens_p.mean_in_band": 0.6837993421052632, "tokens_rate.above_band": 0.9713855421686747, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0286144578313253 }, { "epoch": 1.2408577127659575, "grad_norm": 94.82940739419936, "learning_rate": 1.7307259718572e-07, "loss": 0.2434, "step": 7465, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.44285714285714284, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5151515151515151, "success_rate.epoch.env.logic": 0.5641025641025641, "success_rate.epoch.env.math": 0.9630681818181818, "success_rate.epoch.env.sat": 0.05504587155963303, "success_rate.epoch.env.science": 0.9440809042236764, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.644947747939251, "success_rate.epoch.global": 0.8365788468881252, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9911085690515807, "tokens_p.mean_below_band": 1.755543053150177e-07, "tokens_p.mean_in_band": 0.7861328125, "tokens_rate.above_band": 0.9901153212520593, "tokens_rate.below_band": 0.0032948929159802307, "tokens_rate.in_band": 0.006589785831960461 }, { "epoch": 1.241688829787234, "grad_norm": 43.24476711455403, "learning_rate": 1.7304760867012732e-07, "loss": 0.3674, "step": 7470, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.44285714285714284, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5151515151515151, "success_rate.epoch.env.logic": 0.5623003194888179, "success_rate.epoch.env.math": 0.9631728045325779, "success_rate.epoch.env.sat": 0.05504587155963303, "success_rate.epoch.env.science": 0.944279786603438, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6448114988920156, "success_rate.epoch.global": 0.8366958507803578, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9990208877284595, "tokens_p.mean_in_band": 0.6417534722222222, "tokens_rate.above_band": 0.9445129469790382, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.055487053020961775 }, { "epoch": 1.2425199468085106, "grad_norm": 229.6832796178881, "learning_rate": 1.7302263249332598e-07, "loss": 0.2719, "step": 7475, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.44285714285714284, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5151515151515151, "success_rate.epoch.env.logic": 0.5623003194888179, "success_rate.epoch.env.math": 0.9631728045325779, "success_rate.epoch.env.sat": 0.05504587155963303, "success_rate.epoch.env.science": 0.9444772593030124, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6448294509556134, "success_rate.epoch.global": 0.8370679832890239, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9898852657004831, "tokens_p.mean_below_band": 9.049472282640636e-11, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.971830985915493, "tokens_rate.below_band": 0.004694835680751174, "tokens_rate.in_band": 0.023474178403755867 }, { "epoch": 1.2433510638297873, "grad_norm": 248.54523507004134, "learning_rate": 1.7299766869964256e-07, "loss": 0.2548, "step": 7480, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.43661971830985913, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5151515151515151, "success_rate.epoch.env.logic": 0.5636942675159236, "success_rate.epoch.env.math": 0.963276836158192, "success_rate.epoch.env.sat": 0.05504587155963303, "success_rate.epoch.env.science": 0.9446733372572101, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6444164175973982, "success_rate.epoch.global": 0.8372445117335352, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9937177835051546, "tokens_p.mean_in_band": 0.59677734375, "tokens_rate.above_band": 0.9509803921568627, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049019607843137254 }, { "epoch": 1.2441821808510638, "grad_norm": 31.37462547237889, "learning_rate": 1.7297271733338174e-07, "loss": 0.4799, "step": 7485, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.43661971830985913, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.5632911392405063, "success_rate.epoch.env.math": 0.9633802816901409, "success_rate.epoch.env.sat": 0.05504587155963303, "success_rate.epoch.env.science": 0.9442160892542572, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6406405999042264, "success_rate.epoch.global": 0.8362881931346662, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45999999999999996, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.981434124386252, "tokens_p.mean_below_band": 2.0117568055866286e-07, "tokens_p.mean_in_band": 0.06185242674710122, "tokens_rate.above_band": 0.16036745406824146, "tokens_rate.below_band": 0.002099737532808399, "tokens_rate.in_band": 0.8375328083989502 }, { "epoch": 1.2450132978723405, "grad_norm": 124.82186128878911, "learning_rate": 1.7294777843882605e-07, "loss": 0.2841, "step": 7490, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.43661971830985913, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.5646687697160884, "success_rate.epoch.env.math": 0.9635854341736695, "success_rate.epoch.env.sat": 0.05454545454545454, "success_rate.epoch.env.science": 0.9443793911007026, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6407538424307153, "success_rate.epoch.global": 0.8364661654135338, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9924242424242424, "tokens_p.mean_in_band": 0.6579861111111112, "tokens_rate.above_band": 0.9428571428571428, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05714285714285714 }, { "epoch": 1.245844414893617, "grad_norm": 316.8585395516994, "learning_rate": 1.7292285206023589e-07, "loss": 0.3493, "step": 7495, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.43661971830985913, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.5642633228840125, "success_rate.epoch.env.math": 0.9635854341736695, "success_rate.epoch.env.sat": 0.05454545454545454, "success_rate.epoch.env.science": 0.9445740956826137, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6407346840443366, "success_rate.epoch.global": 0.8365817091454273, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0000903179190752, "tokens_p.mean_in_band": 0.4714988425925926, "tokens_rate.above_band": 0.9624478442280946, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037552155771905425 }, { "epoch": 1.2466755319148937, "grad_norm": 0.9498932657786866, "learning_rate": 1.728979382418495e-07, "loss": 0.2038, "step": 7500, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.43661971830985913, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.565625, "success_rate.epoch.env.math": 0.9637883008356546, "success_rate.epoch.env.sat": 0.05454545454545454, "success_rate.epoch.env.science": 0.9447674418604651, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6408944922221388, "success_rate.epoch.global": 0.8371311169219275, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9991258741258742, "tokens_p.mean_in_band": 0.5348557692307693, "tokens_rate.above_band": 0.9821428571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017857142857142856 }, { "epoch": 1.2475066489361701, "grad_norm": 235.16793673376912, "learning_rate": 1.7287303702788283e-07, "loss": 0.2561, "step": 7505, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4305555555555556, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.565625, "success_rate.epoch.env.math": 0.96398891966759, "success_rate.epoch.env.sat": 0.05454545454545454, "success_rate.epoch.env.science": 0.9449275362318841, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6403759968084162, "success_rate.epoch.global": 0.837243947858473, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9990390528905289, "tokens_p.mean_in_band": 0.5276988636363636, "tokens_rate.above_band": 0.9866504854368932, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013349514563106795 }, { "epoch": 1.2483377659574468, "grad_norm": 884.4672010310525, "learning_rate": 1.728481484625294e-07, "loss": 0.4626, "step": 7510, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4305555555555556, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.565625, "success_rate.epoch.env.math": 0.96398891966759, "success_rate.epoch.env.sat": 0.05454545454545454, "success_rate.epoch.env.science": 0.9451817657241778, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6403991085804429, "success_rate.epoch.global": 0.837727441515039, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988086510263929, "tokens_p.mean_in_band": 0.694078947368421, "tokens_rate.above_band": 0.9728958630527818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02710413694721826 }, { "epoch": 1.2491688829787235, "grad_norm": 82.08050344934207, "learning_rate": 1.728232725899603e-07, "loss": 0.3161, "step": 7515, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4305555555555556, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.565625, "success_rate.epoch.env.math": 0.9640883977900553, "success_rate.epoch.env.sat": 0.05454545454545454, "success_rate.epoch.env.science": 0.9448909299655568, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6403817124317015, "success_rate.epoch.global": 0.8379578246392897, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9823529411764705, "tokens_p.mean_in_band": 0.7098214285714286, "tokens_rate.above_band": 0.9239130434782609, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07608695652173914 }, { "epoch": 1.25, "grad_norm": 176.83054269338285, "learning_rate": 1.7279840945432416e-07, "loss": 0.3441, "step": 7520, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4305555555555556, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.565625, "success_rate.epoch.env.math": 0.9643835616438357, "success_rate.epoch.env.sat": 0.05405405405405406, "success_rate.epoch.env.science": 0.9445080091533181, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6403290617544416, "success_rate.epoch.global": 0.8378179137486178, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9856601731601732, "tokens_p.mean_in_band": 0.6815185546875, "tokens_rate.above_band": 0.8783269961977186, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12167300380228137 }, { "epoch": 1.2508311170212765, "grad_norm": 86.48136312755608, "learning_rate": 1.7277355909974692e-07, "loss": 0.2084, "step": 7525, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4305555555555556, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.5652173913043478, "success_rate.epoch.env.math": 0.9644808743169399, "success_rate.epoch.env.sat": 0.05405405405405406, "success_rate.epoch.env.science": 0.944634703196347, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6403123706653944, "success_rate.epoch.global": 0.8378676470588236, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951463560334528, "tokens_p.mean_in_band": 0.7140625, "tokens_rate.above_band": 0.9766627771295215, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023337222870478413 }, { "epoch": 1.2516622340425532, "grad_norm": 54.12244428987018, "learning_rate": 1.7274872157033187e-07, "loss": 0.4196, "step": 7530, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4305555555555556, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.5652173913043478, "success_rate.epoch.env.math": 0.9644808743169399, "success_rate.epoch.env.sat": 0.05405405405405406, "success_rate.epoch.env.science": 0.9443181818181818, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6402835959946521, "success_rate.epoch.global": 0.8379765395894428, "success_rate.window.env.science": 0.9, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9833984375, "tokens_p.mean_below_band": 2.2851054382044822e-11, "tokens_p.mean_in_band": 0.86328125, "tokens_rate.above_band": 0.927536231884058, "tokens_rate.below_band": 0.007246376811594203, "tokens_rate.in_band": 0.06521739130434782 }, { "epoch": 1.2524933510638299, "grad_norm": 101.42877078453, "learning_rate": 1.7272389691015956e-07, "loss": 0.2857, "step": 7535, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4246575342465753, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.5652173913043478, "success_rate.epoch.env.math": 0.9644808743169399, "success_rate.epoch.env.sat": 0.05405405405405406, "success_rate.epoch.env.science": 0.9445073612684032, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6397646103711285, "success_rate.epoch.global": 0.8380255941499086, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9935850439882697, "tokens_p.mean_in_band": 0.7666903409090909, "tokens_rate.above_band": 0.9393939393939394, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06060606060606061 }, { "epoch": 1.2533244680851063, "grad_norm": 88.0597909106569, "learning_rate": 1.7269908516328767e-07, "loss": 0.241, "step": 7540, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4246575342465753, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.5652173913043478, "success_rate.epoch.env.math": 0.9646739130434783, "success_rate.epoch.env.sat": 0.05405405405405406, "success_rate.epoch.env.science": 0.9446952595936795, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6397992410122026, "success_rate.epoch.global": 0.8384979948960991, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9861111111111112, "tokens_p.mean_in_band": 0.7739955357142857, "tokens_rate.above_band": 0.9507042253521126, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04929577464788732 }, { "epoch": 1.254155585106383, "grad_norm": 82.27353593075074, "learning_rate": 1.72674286373751e-07, "loss": 0.2817, "step": 7545, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4246575342465753, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.5652173913043478, "success_rate.epoch.env.math": 0.9646739130434783, "success_rate.epoch.env.sat": 0.05405405405405406, "success_rate.epoch.env.science": 0.9443194600674916, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6397650774189129, "success_rate.epoch.global": 0.8384867224445253, "success_rate.window.env.ded": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.988030095759234, "tokens_p.mean_in_band": 0.6106627747252747, "tokens_rate.above_band": 0.80065717415115, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.19934282584884994 }, { "epoch": 1.2549867021276595, "grad_norm": 144.41834092786416, "learning_rate": 1.7264950058556127e-07, "loss": 0.3731, "step": 7550, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4246575342465753, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.56656346749226, "success_rate.epoch.env.math": 0.9648648648648649, "success_rate.epoch.env.sat": 0.05357142857142857, "success_rate.epoch.env.science": 0.9438832772166106, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6398212792076211, "success_rate.epoch.global": 0.8382299601015597, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9930921052631579, "tokens_p.mean_below_band": 3.213062882423401e-08, "tokens_p.mean_in_band": 0.6282552083333334, "tokens_rate.above_band": 0.9193548387096774, "tokens_rate.below_band": 0.0032258064516129032, "tokens_rate.in_band": 0.07741935483870968 }, { "epoch": 1.2558178191489362, "grad_norm": 62.30315013495547, "learning_rate": 1.726247278427072e-07, "loss": 0.318, "step": 7555, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4246575342465753, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.5679012345679012, "success_rate.epoch.env.math": 0.9650537634408602, "success_rate.epoch.env.sat": 0.05357142857142857, "success_rate.epoch.env.science": 0.9441028507546115, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.639980028224861, "success_rate.epoch.global": 0.838814600650524, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9947129909365559, "tokens_p.mean_in_band": 0.839111328125, "tokens_rate.above_band": 0.976401179941003, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02359882005899705 }, { "epoch": 1.2566489361702127, "grad_norm": 132.70108707197568, "learning_rate": 1.7259996818915436e-07, "loss": 0.4093, "step": 7560, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4189189189189189, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.5661538461538461, "success_rate.epoch.env.math": 0.9651474530831099, "success_rate.epoch.env.sat": 0.05357142857142857, "success_rate.epoch.env.science": 0.9437012263099219, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6392714883572108, "success_rate.epoch.global": 0.8381981981981982, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9995865490628445, "tokens_p.mean_below_band": 9.255018085241318e-09, "tokens_p.mean_in_band": 0.6165114182692307, "tokens_rate.above_band": 0.9710920770877944, "tokens_rate.below_band": 0.0010706638115631692, "tokens_rate.in_band": 0.027837259100642397 }, { "epoch": 1.2574800531914894, "grad_norm": 79.06175651425075, "learning_rate": 1.7257522166884506e-07, "loss": 0.2467, "step": 7565, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.41333333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.5674846625766872, "success_rate.epoch.env.math": 0.9652406417112299, "success_rate.epoch.env.sat": 0.05309734513274336, "success_rate.epoch.env.science": 0.943301834352418, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6388137559998642, "success_rate.epoch.global": 0.8376436781609196, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.5599999999999999, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.995434131736527, "tokens_p.mean_in_band": 0.6672623005319149, "tokens_rate.above_band": 0.9467120181405896, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05328798185941043 }, { "epoch": 1.258311170212766, "grad_norm": 144.34217262881566, "learning_rate": 1.725504883256983e-07, "loss": 0.6389, "step": 7570, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.41333333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5074626865671642, "success_rate.epoch.env.logic": 0.5657492354740061, "success_rate.epoch.env.math": 0.9653333333333334, "success_rate.epoch.env.sat": 0.05309734513274336, "success_rate.epoch.env.science": 0.9434276206322796, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6386758515270718, "success_rate.epoch.global": 0.8376344086021505, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971198156682027, "tokens_p.mean_in_band": 0.5610795454545454, "tokens_rate.above_band": 0.987485779294653, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012514220705346985 }, { "epoch": 1.2591422872340425, "grad_norm": 26.522834709665652, "learning_rate": 1.7252576820360972e-07, "loss": 0.2027, "step": 7575, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.41333333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5657492354740061, "success_rate.epoch.env.math": 0.9654255319148937, "success_rate.epoch.env.sat": 0.05309734513274336, "success_rate.epoch.env.science": 0.9430624654505252, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6379726112391301, "success_rate.epoch.global": 0.8373838456040028, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9924933274021353, "tokens_p.mean_below_band": 5.400124791776761e-13, "tokens_p.mean_in_band": 0.8057914402173914, "tokens_rate.above_band": 0.9598633646456021, "tokens_rate.below_band": 0.0008539709649871904, "tokens_rate.in_band": 0.03928266438941076 }, { "epoch": 1.2599734042553192, "grad_norm": 108.90530153265249, "learning_rate": 1.7250106134645148e-07, "loss": 0.3732, "step": 7580, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.41333333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5670731707317073, "success_rate.epoch.env.math": 0.9656992084432717, "success_rate.epoch.env.sat": 0.05309734513274336, "success_rate.epoch.env.science": 0.9431880860452289, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6381292687282922, "success_rate.epoch.global": 0.837847469707769, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.991875, "tokens_p.mean_in_band": 0.85546875, "tokens_rate.above_band": 0.9900990099009901, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009900990099009901 }, { "epoch": 1.2608045212765957, "grad_norm": 181.54067229850443, "learning_rate": 1.724763677980722e-07, "loss": 0.4089, "step": 7585, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.41333333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5670731707317073, "success_rate.epoch.env.math": 0.9657894736842105, "success_rate.epoch.env.sat": 0.05263157894736842, "success_rate.epoch.env.science": 0.942888522789676, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.638067899255566, "success_rate.epoch.global": 0.8377130681818182, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995, "tokens_p.mean_below_band": 2.7830537874251604e-10, "tokens_p.mean_in_band": 0.6794704861111112, "tokens_rate.above_band": 0.8875739644970414, "tokens_rate.below_band": 0.005917159763313609, "tokens_rate.in_band": 0.10650887573964497 }, { "epoch": 1.2616356382978724, "grad_norm": 87.58291830234873, "learning_rate": 1.7245168760229688e-07, "loss": 0.3688, "step": 7590, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.42105263157894735, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5666666666666667, "success_rate.epoch.env.math": 0.9659685863874345, "success_rate.epoch.env.sat": 0.05263157894736842, "success_rate.epoch.env.science": 0.943013698630137, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6387603613214076, "success_rate.epoch.global": 0.8378761061946902, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994077480490524, "tokens_p.mean_in_band": 0.5328125, "tokens_rate.above_band": 0.9781897491821155, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021810250817884406 }, { "epoch": 1.2624667553191489, "grad_norm": 313.01757503628585, "learning_rate": 1.7242702080292678e-07, "loss": 0.3031, "step": 7595, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.42105263157894735, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5666666666666667, "success_rate.epoch.env.math": 0.9659685863874345, "success_rate.epoch.env.sat": 0.06086956521739131, "success_rate.epoch.env.science": 0.9426229508196722, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6394737466359128, "success_rate.epoch.global": 0.8378664782762275, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9914398923444976, "tokens_p.mean_below_band": 5.997717380523682e-07, "tokens_p.mean_in_band": 0.5068672558309038, "tokens_rate.above_band": 0.8293650793650794, "tokens_rate.below_band": 0.000496031746031746, "tokens_rate.in_band": 0.1701388888888889 }, { "epoch": 1.2632978723404256, "grad_norm": 88.11740418792925, "learning_rate": 1.7240236744373946e-07, "loss": 0.3213, "step": 7600, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.42105263157894735, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5666666666666667, "success_rate.epoch.env.math": 0.9660574412532638, "success_rate.epoch.env.sat": 0.06779661016949153, "success_rate.epoch.env.science": 0.9417211328976035, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6400295722628093, "success_rate.epoch.global": 0.837029215065118, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.3333333333333333, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9955179282868526, "tokens_p.mean_below_band": 4.6798959374427795e-08, "tokens_p.mean_in_band": 0.6223958333333334, "tokens_rate.above_band": 0.8366666666666667, "tokens_rate.below_band": 0.0033333333333333335, "tokens_rate.in_band": 0.16 }, { "epoch": 1.2641289893617023, "grad_norm": 60.223873615188474, "learning_rate": 1.723777275684886e-07, "loss": 0.2599, "step": 7605, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.42105263157894735, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5649546827794562, "success_rate.epoch.env.math": 0.9662337662337662, "success_rate.epoch.env.sat": 0.06779661016949153, "success_rate.epoch.env.science": 0.9418162044589451, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6398986097768669, "success_rate.epoch.global": 0.837021426062522, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9910416666666667, "tokens_p.mean_in_band": 0.525390625, "tokens_rate.above_band": 0.9868421052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013157894736842105 }, { "epoch": 1.2649601063829787, "grad_norm": 75.10334585640015, "learning_rate": 1.7235310122090397e-07, "loss": 0.2009, "step": 7610, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.42105263157894735, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5649546827794562, "success_rate.epoch.env.math": 0.9664082687338501, "success_rate.epoch.env.sat": 0.06722689075630252, "success_rate.epoch.env.science": 0.9420368364030336, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6398827384160473, "success_rate.epoch.global": 0.8372418620931047, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9904141104294478, "tokens_p.mean_in_band": 0.6107954545454546, "tokens_rate.above_band": 0.8810810810810811, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11891891891891893 }, { "epoch": 1.2657912234042552, "grad_norm": 89.47123789519968, "learning_rate": 1.7232848844469122e-07, "loss": 0.2706, "step": 7615, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4155844155844156, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5615615615615616, "success_rate.epoch.env.math": 0.9665809768637532, "success_rate.epoch.env.sat": 0.06722689075630252, "success_rate.epoch.env.science": 0.9421308815575987, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6391014126044147, "success_rate.epoch.global": 0.8366492146596859, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.999929084720121, "tokens_p.mean_in_band": 0.62646484375, "tokens_rate.above_band": 0.9821693907875185, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017830609212481426 }, { "epoch": 1.266622340425532, "grad_norm": 134.08973388186456, "learning_rate": 1.7230388928353205e-07, "loss": 0.364, "step": 7620, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4155844155844156, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5611940298507463, "success_rate.epoch.env.math": 0.9666666666666667, "success_rate.epoch.env.sat": 0.06722689075630252, "success_rate.epoch.env.science": 0.9422869471413161, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6390899783931252, "success_rate.epoch.global": 0.8367560041768186, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9997344192634561, "tokens_p.mean_in_band": 0.7513020833333334, "tokens_rate.above_band": 0.9631650750341064, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.036834924965893585 }, { "epoch": 1.2674534574468086, "grad_norm": 39.52536572473698, "learning_rate": 1.7227930378108399e-07, "loss": 0.4201, "step": 7625, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4155844155844156, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5625, "success_rate.epoch.env.math": 0.9670050761421319, "success_rate.epoch.env.sat": 0.06722689075630252, "success_rate.epoch.env.science": 0.942380183091007, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6392479434453443, "success_rate.epoch.global": 0.8372093023255814, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9931175595238095, "tokens_p.mean_in_band": 0.859375, "tokens_rate.above_band": 0.9824561403508771, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017543859649122806 }, { "epoch": 1.268284574468085, "grad_norm": 67.08506398746117, "learning_rate": 1.7225473198098018e-07, "loss": 0.5256, "step": 7630, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4155844155844156, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.7142857142857143, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4857142857142857, "success_rate.epoch.env.logic": 0.5608308605341247, "success_rate.epoch.env.math": 0.9670886075949368, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9425040300913488, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6377654253191553, "success_rate.epoch.global": 0.8363321799307959, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9917110874200427, "tokens_p.mean_below_band": 3.9301812648773193e-07, "tokens_p.mean_in_band": 0.5827058232931727, "tokens_rate.above_band": 0.8245428973277075, "tokens_rate.below_band": 0.00035161744022503517, "tokens_rate.in_band": 0.1751054852320675 }, { "epoch": 1.2691156914893618, "grad_norm": 132.7669125484632, "learning_rate": 1.722301739268296e-07, "loss": 0.3246, "step": 7635, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4155844155844156, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4857142857142857, "success_rate.epoch.env.logic": 0.5608308605341247, "success_rate.epoch.env.math": 0.9671717171717171, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9426888055704339, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.629672895389169, "success_rate.epoch.global": 0.8364389233954451, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9898127753303965, "tokens_p.mean_in_band": 0.648046875, "tokens_rate.above_band": 0.850187265917603, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.149812734082397 }, { "epoch": 1.2699468085106382, "grad_norm": 39.39159648925311, "learning_rate": 1.7220562966221682e-07, "loss": 0.3642, "step": 7640, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4155844155844156, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4857142857142857, "success_rate.epoch.env.logic": 0.5591715976331361, "success_rate.epoch.env.math": 0.9671717171717171, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9429028815368197, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6295415147587506, "success_rate.epoch.global": 0.8365450791465933, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973731884057971, "tokens_p.mean_in_band": 0.6219618055555556, "tokens_rate.above_band": 0.9745762711864406, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025423728813559324 }, { "epoch": 1.270777925531915, "grad_norm": 69.6753289548881, "learning_rate": 1.721810992307018e-07, "loss": 0.3274, "step": 7645, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4155844155844156, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4857142857142857, "success_rate.epoch.env.logic": 0.5591715976331361, "success_rate.epoch.env.math": 0.9672544080604534, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9430851063829787, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6295655980073774, "success_rate.epoch.global": 0.8369378647442499, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9916857798165137, "tokens_p.mean_in_band": 0.7927734375, "tokens_rate.above_band": 0.956140350877193, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043859649122807015 }, { "epoch": 1.2716090425531914, "grad_norm": 89.27030382498833, "learning_rate": 1.7215658267582013e-07, "loss": 0.3655, "step": 7650, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4155844155844156, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4857142857142857, "success_rate.epoch.env.logic": 0.56047197640118, "success_rate.epoch.env.math": 0.9675, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9431455897980872, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6297116392912593, "success_rate.epoch.global": 0.8372730387118876, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9917553191489362, "tokens_p.mean_below_band": 2.648448571562767e-09, "tokens_p.mean_in_band": 0.5703125, "tokens_rate.above_band": 0.9873949579831933, "tokens_rate.below_band": 0.004201680672268907, "tokens_rate.in_band": 0.008403361344537815 }, { "epoch": 1.272440159574468, "grad_norm": 200.56429071306124, "learning_rate": 1.7213208004108268e-07, "loss": 0.5054, "step": 7655, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4155844155844156, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4788732394366197, "success_rate.epoch.env.logic": 0.56047197640118, "success_rate.epoch.env.math": 0.9675, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9433562731604024, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6291088790262273, "success_rate.epoch.global": 0.8373761530577383, "success_rate.window.env.ded": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9961345665066366, "tokens_p.mean_in_band": 0.5514420503211992, "tokens_rate.above_band": 0.8834830339321357, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11651696606786427 }, { "epoch": 1.2732712765957448, "grad_norm": 92.1135032399493, "learning_rate": 1.7210759136997557e-07, "loss": 0.3051, "step": 7660, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4155844155844156, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4861111111111111, "success_rate.epoch.env.logic": 0.5588235294117647, "success_rate.epoch.env.math": 0.9650872817955112, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9435058078141499, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6294112645838941, "success_rate.epoch.global": 0.8371379897785349, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9925431711145997, "tokens_p.mean_below_band": 2.8405338525772095e-08, "tokens_p.mean_in_band": 0.775, "tokens_rate.above_band": 0.9762452107279693, "tokens_rate.below_band": 0.0007662835249042146, "tokens_rate.in_band": 0.022988505747126436 }, { "epoch": 1.2741023936170213, "grad_norm": 65.59703990279134, "learning_rate": 1.7208311670596026e-07, "loss": 0.4438, "step": 7665, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4155844155844156, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4861111111111111, "success_rate.epoch.env.logic": 0.5568513119533528, "success_rate.epoch.env.math": 0.9653465346534653, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9435951502372166, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6292636625677676, "success_rate.epoch.global": 0.8369565217391305, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9982142857142857, "tokens_p.mean_in_band": 0.59442138671875, "tokens_rate.above_band": 0.9745425616547335, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02545743834526651 }, { "epoch": 1.2749335106382977, "grad_norm": 19.351340735188106, "learning_rate": 1.7205865609247322e-07, "loss": 0.2492, "step": 7670, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4155844155844156, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4861111111111111, "success_rate.epoch.env.logic": 0.5568513119533528, "success_rate.epoch.env.math": 0.9654320987654321, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9437434279705573, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.629284920917341, "success_rate.epoch.global": 0.8372881355932204, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9947996918335902, "tokens_p.mean_in_band": 0.7075520833333333, "tokens_rate.above_band": 0.9774096385542169, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022590361445783132 }, { "epoch": 1.2757646276595744, "grad_norm": 17.405691056369445, "learning_rate": 1.720342095729261e-07, "loss": 0.2285, "step": 7675, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4155844155844156, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4861111111111111, "success_rate.epoch.env.logic": 0.5594202898550724, "success_rate.epoch.env.math": 0.9656862745098039, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9438614900314796, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6295523041634332, "success_rate.epoch.global": 0.8377830348090571, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9960477941176471, "tokens_p.mean_in_band": 0.7702414772727273, "tokens_rate.above_band": 0.9392265193370166, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06077348066298342 }, { "epoch": 1.2765957446808511, "grad_norm": 71.67340897541226, "learning_rate": 1.7200977719070542e-07, "loss": 0.2461, "step": 7680, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4155844155844156, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4861111111111111, "success_rate.epoch.env.logic": 0.5578034682080925, "success_rate.epoch.env.math": 0.9656862745098039, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9435736677115988, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6293791547119004, "success_rate.epoch.global": 0.8376010781671159, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9964788732394366, "tokens_p.mean_below_band": 2.4158453015843406e-12, "tokens_p.mean_in_band": 0.6385216346153846, "tokens_rate.above_band": 0.9785604900459418, "tokens_rate.below_band": 0.0015313935681470138, "tokens_rate.in_band": 0.019908116385911178 }, { "epoch": 1.2774268617021276, "grad_norm": 124.24369516077107, "learning_rate": 1.7198535898917274e-07, "loss": 0.459, "step": 7685, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4861111111111111, "success_rate.epoch.env.logic": 0.5578034682080925, "success_rate.epoch.env.math": 0.9658536585365853, "success_rate.epoch.env.sat": 0.06666666666666667, "success_rate.epoch.env.science": 0.9437206878582595, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6300888739542594, "success_rate.epoch.global": 0.8380376344086021, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9997616323417239, "tokens_p.mean_in_band": 0.5889269770408163, "tokens_rate.above_band": 0.9639705882352941, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03602941176470588 }, { "epoch": 1.2782579787234043, "grad_norm": 222.10974680153734, "learning_rate": 1.7196095501166434e-07, "loss": 0.2625, "step": 7690, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4861111111111111, "success_rate.epoch.env.logic": 0.5590778097982709, "success_rate.epoch.env.math": 0.9658536585365853, "success_rate.epoch.env.sat": 0.06611570247933884, "success_rate.epoch.env.science": 0.9438961038961039, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.630170582448868, "success_rate.epoch.global": 0.8381367292225201, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9957170050761421, "tokens_p.mean_below_band": 3.213062882423401e-08, "tokens_p.mean_in_band": 0.6052517361111112, "tokens_rate.above_band": 0.9120370370370371, "tokens_rate.below_band": 0.004629629629629629, "tokens_rate.in_band": 0.08333333333333333 }, { "epoch": 1.2790890957446808, "grad_norm": 49.216118031609234, "learning_rate": 1.7193656530149132e-07, "loss": 0.3442, "step": 7695, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4861111111111111, "success_rate.epoch.env.logic": 0.5590778097982709, "success_rate.epoch.env.math": 0.9659367396593674, "success_rate.epoch.env.sat": 0.06611570247933884, "success_rate.epoch.env.science": 0.944070429829104, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6301939830903028, "success_rate.epoch.global": 0.8385155466399198, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9903621495327103, "tokens_p.mean_in_band": 0.8419471153846154, "tokens_rate.above_band": 0.8916666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10833333333333334 }, { "epoch": 1.2799202127659575, "grad_norm": 157.99224406914018, "learning_rate": 1.7191218990193946e-07, "loss": 0.3083, "step": 7700, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.4230769230769231, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4931506849315068, "success_rate.epoch.env.logic": 0.5590778097982709, "success_rate.epoch.env.math": 0.9661016949152542, "success_rate.epoch.env.sat": 0.06611570247933884, "success_rate.epoch.env.science": 0.9442436757873, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6308646899116191, "success_rate.epoch.global": 0.839, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9886363636363636, "tokens_p.mean_in_band": 0.8642578125, "tokens_rate.above_band": 0.9929203539823008, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007079646017699115 }, { "epoch": 1.280751329787234, "grad_norm": 138.4902797807258, "learning_rate": 1.718878288562691e-07, "loss": 0.2881, "step": 7705, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4931506849315068, "success_rate.epoch.env.logic": 0.5574712643678161, "success_rate.epoch.env.math": 0.9661016949152542, "success_rate.epoch.env.sat": 0.06611570247933884, "success_rate.epoch.env.science": 0.94390118373649, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6313513977550058, "success_rate.epoch.global": 0.8387632978723404, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9970034246575342, "tokens_p.mean_in_band": 0.58515625, "tokens_rate.above_band": 0.9776785714285714, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022321428571428572 }, { "epoch": 1.2815824468085106, "grad_norm": 85.17321046800986, "learning_rate": 1.718634822077151e-07, "loss": 0.3031, "step": 7710, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4931506849315068, "success_rate.epoch.env.logic": 0.5574712643678161, "success_rate.epoch.env.math": 0.9663461538461539, "success_rate.epoch.env.sat": 0.06611570247933884, "success_rate.epoch.env.science": 0.9440164355418593, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6313840987310303, "success_rate.epoch.global": 0.8391376451077943, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956825657894737, "tokens_p.mean_in_band": 0.6919642857142857, "tokens_rate.above_band": 0.9156626506024096, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08433734939759036 }, { "epoch": 1.2824135638297873, "grad_norm": 51.72451180842357, "learning_rate": 1.7183914999948683e-07, "loss": 0.2251, "step": 7715, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5587392550143266, "success_rate.epoch.env.math": 0.9664268585131894, "success_rate.epoch.env.sat": 0.06611570247933884, "success_rate.epoch.env.science": 0.9441598360655737, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6321424088133716, "success_rate.epoch.global": 0.8395633476678795, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9919217687074829, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9932432432432432, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006756756756756757 }, { "epoch": 1.2832446808510638, "grad_norm": 104.89270597254156, "learning_rate": 1.7181483227476802e-07, "loss": 0.2516, "step": 7720, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5587392550143266, "success_rate.epoch.env.math": 0.9665871121718377, "success_rate.epoch.env.sat": 0.06504065040650407, "success_rate.epoch.env.science": 0.9442740286298569, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6320696264633804, "success_rate.epoch.global": 0.8393269548003959, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.992218875502008, "tokens_p.mean_in_band": 0.6833984375, "tokens_rate.above_band": 0.8615916955017301, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1384083044982699 }, { "epoch": 1.2840757978723405, "grad_norm": 116.62884366560398, "learning_rate": 1.717905290767166e-07, "loss": 0.3153, "step": 7725, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.56, "success_rate.epoch.env.math": 0.9643705463182898, "success_rate.epoch.env.sat": 0.06451612903225806, "success_rate.epoch.env.science": 0.9443593670239918, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6319428079317453, "success_rate.epoch.global": 0.8390388413429888, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.75, "tokens_p.mean_above_band": 0.9918870192307693, "tokens_p.mean_below_band": 3.0547380447387695e-07, "tokens_p.mean_in_band": 0.5863486842105263, "tokens_rate.above_band": 0.9122807017543859, "tokens_rate.below_band": 0.0043859649122807015, "tokens_rate.in_band": 0.08333333333333333 }, { "epoch": 1.284906914893617, "grad_norm": 61.1916645236904, "learning_rate": 1.7176624044846485e-07, "loss": 0.2892, "step": 7730, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5612535612535613, "success_rate.epoch.env.math": 0.964622641509434, "success_rate.epoch.env.sat": 0.06451612903225806, "success_rate.epoch.env.science": 0.9444444444444444, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.632087420101305, "success_rate.epoch.global": 0.8394088669950739, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964334239130435, "tokens_p.mean_in_band": 0.6231770833333333, "tokens_rate.above_band": 0.9608355091383812, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0391644908616188 }, { "epoch": 1.2857380319148937, "grad_norm": 97.88755402057637, "learning_rate": 1.7174196643311917e-07, "loss": 0.3285, "step": 7735, "success_rate.epoch.env.abd": 0.5294117647058824, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5625, "success_rate.epoch.env.math": 0.964622641509434, "success_rate.epoch.env.sat": 0.06451612903225806, "success_rate.epoch.env.science": 0.9446138211382114, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6322161305958692, "success_rate.epoch.global": 0.8397771952817824, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9937169312169312, "tokens_p.mean_in_band": 0.7921875, "tokens_rate.above_band": 0.9742268041237113, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02577319587628866 }, { "epoch": 1.2865691489361701, "grad_norm": 84.25347909750369, "learning_rate": 1.7171770707375992e-07, "loss": 0.3818, "step": 7740, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5625, "success_rate.epoch.env.math": 0.9647887323943662, "success_rate.epoch.env.sat": 0.06349206349206349, "success_rate.epoch.env.science": 0.9446981227803145, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6294719998941383, "success_rate.epoch.global": 0.8392156862745098, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9901944294940797, "tokens_p.mean_in_band": 0.5656698258196722, "tokens_rate.above_band": 0.8839200761179828, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11607992388201713 }, { "epoch": 1.2874002659574468, "grad_norm": 34.29660852994889, "learning_rate": 1.7169346241344153e-07, "loss": 0.3328, "step": 7745, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5649717514124294, "success_rate.epoch.env.math": 0.9647887323943662, "success_rate.epoch.env.sat": 0.06349206349206349, "success_rate.epoch.env.science": 0.9447541814495691, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.629701800810655, "success_rate.epoch.global": 0.839425587467363, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987560926076361, "tokens_p.mean_in_band": 0.6353125, "tokens_rate.above_band": 0.9800955414012739, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019904458598726114 }, { "epoch": 1.2882313829787235, "grad_norm": 33.844727744307455, "learning_rate": 1.7166923249519237e-07, "loss": 0.2749, "step": 7750, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5649717514124294, "success_rate.epoch.env.math": 0.9648711943793911, "success_rate.epoch.env.sat": 0.06349206349206349, "success_rate.epoch.env.science": 0.944949494949495, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6297270531274686, "success_rate.epoch.global": 0.83984375, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9987765089722676, "tokens_p.mean_in_band": 0.7222222222222222, "tokens_rate.above_band": 0.9855305466237942, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014469453376205787 }, { "epoch": 1.2890625, "grad_norm": 45.359782350325666, "learning_rate": 1.7164501736201456e-07, "loss": 0.2702, "step": 7755, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5633802816901409, "success_rate.epoch.env.math": 0.9649532710280374, "success_rate.epoch.env.sat": 0.06349206349206349, "success_rate.epoch.env.science": 0.9451158106747231, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6296049551867037, "success_rate.epoch.global": 0.839935064935065, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.999537037037037, "tokens_p.mean_in_band": 0.4560546875, "tokens_rate.above_band": 0.9768451519536903, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023154848046309694 }, { "epoch": 1.2898936170212765, "grad_norm": 74.988351784935, "learning_rate": 1.7162081705688406e-07, "loss": 0.2365, "step": 7760, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5646067415730337, "success_rate.epoch.env.math": 0.965034965034965, "success_rate.epoch.env.sat": 0.06349206349206349, "success_rate.epoch.env.science": 0.9451710261569416, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6297288978568891, "success_rate.epoch.global": 0.8401426718547341, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9925115207373272, "tokens_p.mean_in_band": 0.8139204545454546, "tokens_rate.above_band": 0.9517543859649122, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04824561403508772 }, { "epoch": 1.2907247340425532, "grad_norm": 56.988295766203706, "learning_rate": 1.715966316227505e-07, "loss": 0.3599, "step": 7765, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5646067415730337, "success_rate.epoch.env.math": 0.9651972157772621, "success_rate.epoch.env.sat": 0.06299212598425197, "success_rate.epoch.env.science": 0.9453634085213033, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6297156883658752, "success_rate.epoch.global": 0.8403361344537815, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9940119760479041, "tokens_p.mean_in_band": 0.6, "tokens_rate.above_band": 0.9175824175824175, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08241758241758242 }, { "epoch": 1.2915558510638299, "grad_norm": 35.824128708039495, "learning_rate": 1.7157246110253704e-07, "loss": 0.2489, "step": 7770, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5630252100840336, "success_rate.epoch.env.math": 0.9651972157772621, "success_rate.epoch.env.sat": 0.06299212598425197, "success_rate.epoch.env.science": 0.945, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6295388756376658, "success_rate.epoch.global": 0.84, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9939577039274925, "tokens_p.mean_in_band": 0.6379024621212122, "tokens_rate.above_band": 0.9525179856115108, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04748201438848921 }, { "epoch": 1.2923869680851063, "grad_norm": 45.869845240653746, "learning_rate": 1.715483055391405e-07, "loss": 0.2914, "step": 7775, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.3333333333333333, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5654596100278552, "success_rate.epoch.env.math": 0.9652777777777778, "success_rate.epoch.env.sat": 0.0625, "success_rate.epoch.env.science": 0.945082376435347, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.629730258582705, "success_rate.epoch.global": 0.8400386224654007, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949383802816901, "tokens_p.mean_in_band": 0.7193667763157895, "tokens_rate.above_band": 0.9372937293729373, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0627062706270627 }, { "epoch": 1.293218085106383, "grad_norm": 45.28572513944938, "learning_rate": 1.7152416497543103e-07, "loss": 0.3758, "step": 7780, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5654596100278552, "success_rate.epoch.env.math": 0.9653579676674365, "success_rate.epoch.env.sat": 0.06201550387596899, "success_rate.epoch.env.science": 0.944748631159781, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6448146781424078, "success_rate.epoch.global": 0.8398587933247753, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9955654101995566, "tokens_p.mean_in_band": 0.7157118055555556, "tokens_rate.above_band": 0.9435146443514645, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.056485355648535567 }, { "epoch": 1.2940492021276595, "grad_norm": 70.33801699162333, "learning_rate": 1.7150003945425226e-07, "loss": 0.2366, "step": 7785, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.43037974683544306, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5666666666666667, "success_rate.epoch.env.math": 0.9655963302752294, "success_rate.epoch.env.sat": 0.06201550387596899, "success_rate.epoch.env.science": 0.9448857994041708, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6449585497324981, "success_rate.epoch.global": 0.84032, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9905092592592593, "tokens_p.mean_in_band": 0.896484375, "tokens_rate.above_band": 0.9926470588235294, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007352941176470588 }, { "epoch": 1.2948803191489362, "grad_norm": 40.558210758108984, "learning_rate": 1.7147592901842106e-07, "loss": 0.2904, "step": 7790, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5678670360110804, "success_rate.epoch.env.math": 0.9659863945578231, "success_rate.epoch.env.sat": 0.06201550387596899, "success_rate.epoch.env.science": 0.9449677739216659, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6457578825787762, "success_rate.epoch.global": 0.840829346092504, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973958333333334, "tokens_p.mean_in_band": 0.697265625, "tokens_rate.above_band": 0.993103448275862, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006896551724137931 }, { "epoch": 1.2957114361702127, "grad_norm": 183.04553545185934, "learning_rate": 1.7145183371072755e-07, "loss": 0.2811, "step": 7795, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5686813186813187, "success_rate.epoch.env.math": 0.9659863945578231, "success_rate.epoch.env.sat": 0.06153846153846154, "success_rate.epoch.env.science": 0.9450766947055913, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6457984426802904, "success_rate.epoch.global": 0.8405981546293351, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9971991037131882, "tokens_p.mean_in_band": 0.6282958984375, "tokens_rate.above_band": 0.9606396063960639, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03936039360393604 }, { "epoch": 1.2965425531914894, "grad_norm": 35.244866382635706, "learning_rate": 1.7142775357393502e-07, "loss": 0.4762, "step": 7800, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5683060109289617, "success_rate.epoch.env.math": 0.9659863945578231, "success_rate.epoch.env.sat": 0.06153846153846154, "success_rate.epoch.env.science": 0.9451581027667985, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6457717245265495, "success_rate.epoch.global": 0.8405336721728082, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.25, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4166666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9996878286014721, "tokens_p.mean_in_band": 0.5009581367924528, "tokens_rate.above_band": 0.9728900255754476, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02710997442455243 }, { "epoch": 1.297373670212766, "grad_norm": 18.618669521771686, "learning_rate": 1.7140368865077978e-07, "loss": 0.3719, "step": 7805, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5694822888283378, "success_rate.epoch.env.math": 0.9659863945578231, "success_rate.epoch.env.sat": 0.06153846153846154, "success_rate.epoch.env.science": 0.945320197044335, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6458933947244506, "success_rate.epoch.global": 0.8408874801901743, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9897119341563786, "tokens_p.mean_in_band": 0.7223074776785714, "tokens_rate.above_band": 0.8966789667896679, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1033210332103321 }, { "epoch": 1.2982047872340425, "grad_norm": 201.23196672066155, "learning_rate": 1.7137963898397115e-07, "loss": 0.2532, "step": 7810, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5694822888283378, "success_rate.epoch.env.math": 0.9662162162162162, "success_rate.epoch.env.sat": 0.061068702290076333, "success_rate.epoch.env.science": 0.9454813359528488, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6458862312079527, "success_rate.epoch.global": 0.8410742496050553, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9927147239263804, "tokens_p.mean_in_band": 0.6484375, "tokens_rate.above_band": 0.9106145251396648, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0893854748603352 }, { "epoch": 1.2990359042553192, "grad_norm": 54.528420854433485, "learning_rate": 1.7135560461619146e-07, "loss": 0.2778, "step": 7815, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5679347826086957, "success_rate.epoch.env.math": 0.9662162162162162, "success_rate.epoch.env.sat": 0.061068702290076333, "success_rate.epoch.env.science": 0.94564152791381, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6457601117298908, "success_rate.epoch.global": 0.8411097099621689, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9998310810810811, "tokens_p.mean_below_band": 1.0277290130034089e-10, "tokens_p.mean_in_band": 0.4642269736842105, "tokens_rate.above_band": 0.9736842105263158, "tokens_rate.below_band": 0.0013157894736842105, "tokens_rate.in_band": 0.025 }, { "epoch": 1.2998670212765957, "grad_norm": 103.26100128942463, "learning_rate": 1.713315855900958e-07, "loss": 0.3334, "step": 7820, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5702702702702702, "success_rate.epoch.env.math": 0.9662162162162162, "success_rate.epoch.env.sat": 0.07462686567164178, "success_rate.epoch.env.science": 0.9457478005865103, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6472146502495125, "success_rate.epoch.global": 0.8412448915435398, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936746987951808, "tokens_p.mean_in_band": 0.6223958333333334, "tokens_rate.above_band": 0.9518348623853211, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0481651376146789 }, { "epoch": 1.3006981382978724, "grad_norm": 71.61746023229394, "learning_rate": 1.7130758194831202e-07, "loss": 0.3164, "step": 7825, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4375, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.568733153638814, "success_rate.epoch.env.math": 0.9662162162162162, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.9458800585080449, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6470366820397409, "success_rate.epoch.global": 0.8409661229611042, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.375, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9961166471277842, "tokens_p.mean_in_band": 0.5823863636363636, "tokens_rate.above_band": 0.9627539503386005, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03724604966139955 }, { "epoch": 1.3015292553191489, "grad_norm": 136.2477937279116, "learning_rate": 1.7128359373344077e-07, "loss": 0.4513, "step": 7830, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.5710455764075067, "success_rate.epoch.env.math": 0.9662162162162162, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.9454722492697176, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.647235081249572, "success_rate.epoch.global": 0.8406885758998435, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986328591919675, "tokens_p.mean_below_band": 2.7120113372802733e-07, "tokens_p.mean_in_band": 0.48908617424242423, "tokens_rate.above_band": 0.9609464736963014, "tokens_rate.below_band": 0.0011486331265793705, "tokens_rate.in_band": 0.03790489317711923 }, { "epoch": 1.3023603723404256, "grad_norm": 43.5416123814797, "learning_rate": 1.712596209880552e-07, "loss": 0.3195, "step": 7835, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.5721925133689839, "success_rate.epoch.env.math": 0.9662921348314607, "success_rate.epoch.env.sat": 0.07407407407407407, "success_rate.epoch.env.science": 0.9456574478408539, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6473630861721046, "success_rate.epoch.global": 0.8411360799001248, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994688385269122, "tokens_p.mean_in_band": 0.6610054347826086, "tokens_rate.above_band": 0.9684499314128944, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03155006858710562 }, { "epoch": 1.3031914893617023, "grad_norm": 124.36568743877578, "learning_rate": 1.712356637547011e-07, "loss": 0.3671, "step": 7840, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.5706666666666667, "success_rate.epoch.env.math": 0.9662921348314607, "success_rate.epoch.env.sat": 0.07352941176470588, "success_rate.epoch.env.science": 0.94581519109821, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6471891983762564, "success_rate.epoch.global": 0.8409090909090909, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974850968703428, "tokens_p.mean_in_band": 0.5109375, "tokens_rate.above_band": 0.9437412095639943, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05625879043600562 }, { "epoch": 1.3040226063829787, "grad_norm": 83.12046904820303, "learning_rate": 1.7121172207589668e-07, "loss": 0.2877, "step": 7845, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.5740740740740741, "success_rate.epoch.env.math": 0.9662921348314607, "success_rate.epoch.env.sat": 0.07971014492753623, "success_rate.epoch.env.science": 0.9454106280193236, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6480240690572884, "success_rate.epoch.global": 0.8406832298136646, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9936061381074168, "tokens_p.mean_in_band": 0.6761363636363636, "tokens_rate.above_band": 0.9467312348668281, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.053268765133171914 }, { "epoch": 1.3048537234042552, "grad_norm": 564.5812448606076, "learning_rate": 1.7118779599413248e-07, "loss": 0.2138, "step": 7850, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.5740740740740741, "success_rate.epoch.env.math": 0.9663677130044843, "success_rate.epoch.env.sat": 0.07971014492753623, "success_rate.epoch.env.science": 0.9455684007707129, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6480452827776896, "success_rate.epoch.global": 0.8410288193368454, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978693181818182, "tokens_p.mean_in_band": 0.68359375, "tokens_rate.above_band": 0.9806835066864784, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019316493313521546 }, { "epoch": 1.305684840425532, "grad_norm": 70.87256434077706, "learning_rate": 1.7116388555187155e-07, "loss": 0.1972, "step": 7855, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.575197889182058, "success_rate.epoch.env.math": 0.9663677130044843, "success_rate.epoch.env.sat": 0.07913669064748201, "success_rate.epoch.env.science": 0.9456730769230769, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6481048315940799, "success_rate.epoch.global": 0.8410145375811939, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9939350328947368, "tokens_p.mean_in_band": 0.6740056818181818, "tokens_rate.above_band": 0.9325153374233128, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06748466257668712 }, { "epoch": 1.3065159574468086, "grad_norm": 119.48117115616687, "learning_rate": 1.71139990791549e-07, "loss": 0.3086, "step": 7860, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.5774278215223098, "success_rate.epoch.env.math": 0.9663677130044843, "success_rate.epoch.env.sat": 0.07857142857142857, "success_rate.epoch.env.science": 0.9457773512476008, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6482656447385092, "success_rate.epoch.global": 0.8410493827160493, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936868686868687, "tokens_p.mean_in_band": 0.728125, "tokens_rate.above_band": 0.9369085173501577, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06309148264984227 }, { "epoch": 1.307347074468085, "grad_norm": 42.676069946877085, "learning_rate": 1.7111611175557222e-07, "loss": 0.3076, "step": 7865, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.5774278215223098, "success_rate.epoch.env.math": 0.9663677130044843, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.945933014354067, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6481791923082757, "success_rate.epoch.global": 0.8408251231527094, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9925176056338029, "tokens_p.mean_in_band": 0.5514508928571429, "tokens_rate.above_band": 0.8588709677419355, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14112903225806453 }, { "epoch": 1.3081781914893618, "grad_norm": 0.0, "learning_rate": 1.710922484863206e-07, "loss": 0.266, "step": 7870, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.43902439024390244, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.5785340314136126, "success_rate.epoch.env.math": 0.9664429530201343, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.9460877862595419, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6478079348184472, "success_rate.epoch.global": 0.8409579367516119, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9928925992779783, "tokens_p.mean_in_band": 0.580078125, "tokens_rate.above_band": 0.992831541218638, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007168458781362007 }, { "epoch": 1.3090093085106382, "grad_norm": 116.18762386106206, "learning_rate": 1.710684010261456e-07, "loss": 0.194, "step": 7875, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4457831325301205, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.577023498694517, "success_rate.epoch.env.math": 0.9664429530201343, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.9462160875773441, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6482967085352584, "success_rate.epoch.global": 0.8409926470588235, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976461038961039, "tokens_p.mean_in_band": 0.5659375, "tokens_rate.above_band": 0.9685534591194969, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.031446540880503145 }, { "epoch": 1.309840425531915, "grad_norm": 160.8591830017501, "learning_rate": 1.7104456941737065e-07, "loss": 0.4221, "step": 7880, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4457831325301205, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.577023498694517, "success_rate.epoch.env.math": 0.9666666666666667, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.9463946869070209, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6483332824421866, "success_rate.epoch.global": 0.8414783139890043, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9870495495495496, "tokens_p.mean_in_band": 0.8779296875, "tokens_rate.above_band": 0.9652173913043478, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034782608695652174 }, { "epoch": 1.3106715425531914, "grad_norm": 65.3715205302918, "learning_rate": 1.7102075370229101e-07, "loss": 0.2971, "step": 7885, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4457831325301205, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.5755208333333334, "success_rate.epoch.env.math": 0.9666666666666667, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.9464962121212122, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6482059060651874, "success_rate.epoch.global": 0.841415065568771, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9926362297496318, "tokens_p.mean_in_band": 0.6958912037037037, "tokens_rate.above_band": 0.9617563739376771, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03824362606232295 }, { "epoch": 1.311502659574468, "grad_norm": 68.79483226085244, "learning_rate": 1.7099695392317374e-07, "loss": 0.296, "step": 7890, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4470588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.5755208333333334, "success_rate.epoch.env.math": 0.9667405764966741, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.9466225791213982, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6483400849587769, "success_rate.epoch.global": 0.8414968055978096, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.995846313603323, "tokens_p.mean_in_band": 0.557421875, "tokens_rate.above_band": 0.9948347107438017, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005165289256198347 }, { "epoch": 1.3123337765957448, "grad_norm": 92.85074060125947, "learning_rate": 1.7097317012225757e-07, "loss": 0.2929, "step": 7895, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4470588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.5755208333333334, "success_rate.epoch.env.math": 0.9668141592920354, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.946773433820066, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.648360488367325, "success_rate.epoch.global": 0.8418336369156041, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9988155976676385, "tokens_p.mean_in_band": 0.5561810661764706, "tokens_rate.above_band": 0.9527777777777777, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04722222222222222 }, { "epoch": 1.3131648936170213, "grad_norm": 52.05855833697158, "learning_rate": 1.70949402341753e-07, "loss": 0.3807, "step": 7900, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4470588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49333333333333335, "success_rate.epoch.env.logic": 0.5755208333333334, "success_rate.epoch.env.math": 0.9668874172185431, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.9468984962406015, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6483785174897835, "success_rate.epoch.global": 0.8421212121212122, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9873991935483871, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.992, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008 }, { "epoch": 1.3139960106382977, "grad_norm": 127.58702723849014, "learning_rate": 1.7092565062384197e-07, "loss": 0.2928, "step": 7905, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4470588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4868421052631579, "success_rate.epoch.env.logic": 0.5755208333333334, "success_rate.epoch.env.math": 0.967032967032967, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.9469981238273921, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.647810694701696, "success_rate.epoch.global": 0.8421530087692773, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9993508026440038, "tokens_p.mean_in_band": 0.6882512019230769, "tokens_rate.above_band": 0.9878731343283582, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012126865671641791 }, { "epoch": 1.3148271276595744, "grad_norm": 98.7251028939823, "learning_rate": 1.7090191501067795e-07, "loss": 0.28, "step": 7910, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4470588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4868421052631579, "success_rate.epoch.env.logic": 0.574025974025974, "success_rate.epoch.env.math": 0.9672489082969432, "success_rate.epoch.env.sat": 0.07746478873239436, "success_rate.epoch.env.science": 0.9471221338324755, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6477057030618506, "success_rate.epoch.global": 0.8422798552472859, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9946205007824727, "tokens_p.mean_in_band": 0.5904947916666666, "tokens_rate.above_band": 0.9726027397260274, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0273972602739726 }, { "epoch": 1.3156582446808511, "grad_norm": 1014.5527849798041, "learning_rate": 1.7087819554438587e-07, "loss": 0.3109, "step": 7915, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4868421052631579, "success_rate.epoch.env.logic": 0.574025974025974, "success_rate.epoch.env.math": 0.9673202614379085, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.947196261682243, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6471971040407855, "success_rate.epoch.global": 0.8419626730885009, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9945469798657718, "tokens_p.mean_in_band": 0.6200086805555556, "tokens_rate.above_band": 0.9430379746835443, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.056962025316455694 }, { "epoch": 1.3164893617021276, "grad_norm": 41.45903796314505, "learning_rate": 1.7085449226706193e-07, "loss": 0.3286, "step": 7920, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.625, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4805194805194805, "success_rate.epoch.env.logic": 0.5736434108527132, "success_rate.epoch.env.math": 0.9674620390455532, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.9472947761194029, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6466093862342278, "success_rate.epoch.global": 0.8417892524767338, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9975282485875706, "tokens_p.mean_below_band": 1.6916601452976465e-10, "tokens_p.mean_in_band": 0.7094907407407407, "tokens_rate.above_band": 0.9827267119062307, "tokens_rate.below_band": 0.0006169031462060457, "tokens_rate.in_band": 0.016656384947563233 }, { "epoch": 1.3173204787234043, "grad_norm": 133.09126217363126, "learning_rate": 1.7083080522077372e-07, "loss": 0.3157, "step": 7925, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48717948717948717, "success_rate.epoch.env.logic": 0.5747422680412371, "success_rate.epoch.env.math": 0.9675324675324676, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.9469026548672567, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6409723613832146, "success_rate.epoch.global": 0.841521869382864, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980181747873164, "tokens_p.mean_below_band": 4.1443854570388794e-08, "tokens_p.mean_in_band": 0.5224358974358975, "tokens_rate.above_band": 0.8917241379310344, "tokens_rate.below_band": 0.000689655172413793, "tokens_rate.in_band": 0.10758620689655173 }, { "epoch": 1.3181515957446808, "grad_norm": 33.978257926965945, "learning_rate": 1.7080713444755986e-07, "loss": 0.2957, "step": 7930, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4482758620689655, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48717948717948717, "success_rate.epoch.env.logic": 0.5732647814910026, "success_rate.epoch.env.math": 0.967741935483871, "success_rate.epoch.env.sat": 0.07692307692307693, "success_rate.epoch.env.science": 0.9469767441860465, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.641447040262546, "success_rate.epoch.global": 0.8416019127316199, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9969063845050216, "tokens_p.mean_below_band": 5.3085386753082275e-08, "tokens_p.mean_in_band": 0.589599609375, "tokens_rate.above_band": 0.9761904761904762, "tokens_rate.below_band": 0.0014005602240896359, "tokens_rate.in_band": 0.022408963585434174 }, { "epoch": 1.3189827127659575, "grad_norm": 144.45895246177582, "learning_rate": 1.7078347998943022e-07, "loss": 0.2749, "step": 7935, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.4482758620689655, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48717948717948717, "success_rate.epoch.env.logic": 0.5732647814910026, "success_rate.epoch.env.math": 0.9678111587982833, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.9471243042671614, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6444418429243157, "success_rate.epoch.global": 0.842026825633383, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9920280612244898, "tokens_p.mean_in_band": 0.651875, "tokens_rate.above_band": 0.9216300940438872, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07836990595611286 }, { "epoch": 1.319813829787234, "grad_norm": 60.51122395493921, "learning_rate": 1.7075984188836562e-07, "loss": 0.3397, "step": 7940, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.4431818181818182, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48717948717948717, "success_rate.epoch.env.logic": 0.5743589743589743, "success_rate.epoch.env.math": 0.9680170575692963, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.9471733086190918, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6441013931155674, "success_rate.epoch.global": 0.8420582986317668, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9939769553072626, "tokens_p.mean_in_band": 0.73095703125, "tokens_rate.above_band": 0.988950276243094, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011049723756906077 }, { "epoch": 1.3206449468085106, "grad_norm": 62.699437822293184, "learning_rate": 1.7073622018631798e-07, "loss": 0.321, "step": 7945, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.4431818181818182, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4936708860759494, "success_rate.epoch.env.logic": 0.5739795918367347, "success_rate.epoch.env.math": 0.9680851063829787, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.9472954230235784, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6446743185330575, "success_rate.epoch.global": 0.8421833283892021, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9946737683089214, "tokens_p.mean_in_band": 0.7052017405063291, "tokens_rate.above_band": 0.9048192771084337, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09518072289156626 }, { "epoch": 1.3214760638297873, "grad_norm": 103.37139911912308, "learning_rate": 1.7071261492520996e-07, "loss": 0.3286, "step": 7950, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.4431818181818182, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4936708860759494, "success_rate.epoch.env.logic": 0.5739795918367347, "success_rate.epoch.env.math": 0.9681528662420382, "success_rate.epoch.env.sat": 0.08275862068965517, "success_rate.epoch.env.science": 0.9474412171507607, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.644641485927836, "success_rate.epoch.global": 0.8422610239715892, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944957386363636, "tokens_p.mean_in_band": 0.6982421875, "tokens_rate.above_band": 0.9565217391304348, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043478260869565216 }, { "epoch": 1.3223071808510638, "grad_norm": 53.50656227034014, "learning_rate": 1.7068902614693514e-07, "loss": 0.4684, "step": 7955, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.4431818181818182, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4875, "success_rate.epoch.env.logic": 0.5750636132315522, "success_rate.epoch.env.math": 0.9682875264270613, "success_rate.epoch.env.sat": 0.0821917808219178, "success_rate.epoch.env.science": 0.9474654377880184, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6441419564981461, "success_rate.epoch.global": 0.8419497784342689, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9950044404973357, "tokens_p.mean_in_band": 0.6307641006097561, "tokens_rate.above_band": 0.9321192052980133, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06788079470198675 }, { "epoch": 1.3231382978723405, "grad_norm": 78.7339542445413, "learning_rate": 1.7066545389335782e-07, "loss": 0.4022, "step": 7960, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.4431818181818182, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4875, "success_rate.epoch.env.logic": 0.5750636132315522, "success_rate.epoch.env.math": 0.9682875264270613, "success_rate.epoch.env.sat": 0.0821917808219178, "success_rate.epoch.env.science": 0.9471507352941176, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6441133471805188, "success_rate.epoch.global": 0.8419345325862577, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9941056910569106, "tokens_p.mean_in_band": 0.2910070586622807, "tokens_rate.above_band": 0.7295373665480427, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2704626334519573 }, { "epoch": 1.323969414893617, "grad_norm": 32.7728521523835, "learning_rate": 1.7064189820631298e-07, "loss": 0.325, "step": 7965, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.4431818181818182, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4875, "success_rate.epoch.env.logic": 0.5761421319796954, "success_rate.epoch.env.math": 0.9683544303797469, "success_rate.epoch.env.sat": 0.08163265306122448, "success_rate.epoch.env.science": 0.9473202015574896, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.644182052744474, "success_rate.epoch.global": 0.8421052631578947, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9933712121212122, "tokens_p.mean_in_band": 0.7447916666666666, "tokens_rate.above_band": 0.9041095890410958, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0958904109589041 }, { "epoch": 1.3248005319148937, "grad_norm": 39.58354034681417, "learning_rate": 1.706183591276062e-07, "loss": 0.3829, "step": 7970, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.449438202247191, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4875, "success_rate.epoch.env.logic": 0.5746835443037974, "success_rate.epoch.env.math": 0.9683544303797469, "success_rate.epoch.env.sat": 0.08163265306122448, "success_rate.epoch.env.science": 0.9474885844748858, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6446335235905531, "success_rate.epoch.global": 0.8422287390029326, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992529880478087, "tokens_p.mean_in_band": 0.670703125, "tokens_rate.above_band": 0.9741267787839586, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0258732212160414 }, { "epoch": 1.3256316489361701, "grad_norm": 147.88389160321034, "learning_rate": 1.7059483669901358e-07, "loss": 0.2915, "step": 7975, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.449438202247191, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4875, "success_rate.epoch.env.logic": 0.5757575757575758, "success_rate.epoch.env.math": 0.9683544303797469, "success_rate.epoch.env.sat": 0.08163265306122448, "success_rate.epoch.env.science": 0.947632058287796, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6447442058875249, "success_rate.epoch.global": 0.842551946151595, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9907738095238096, "tokens_p.mean_in_band": 0.828515625, "tokens_rate.above_band": 0.9692307692307692, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03076923076923077 }, { "epoch": 1.3264627659574468, "grad_norm": 25.526319341458443, "learning_rate": 1.7057133096228166e-07, "loss": 0.2057, "step": 7980, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.449438202247191, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4875, "success_rate.epoch.env.logic": 0.5757575757575758, "success_rate.epoch.env.math": 0.9684873949579832, "success_rate.epoch.env.sat": 0.08163265306122448, "success_rate.epoch.env.science": 0.9477272727272728, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6447649494345897, "success_rate.epoch.global": 0.8428279287174992, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944267515923567, "tokens_p.mean_in_band": 0.5390888342696629, "tokens_rate.above_band": 0.8858681713259811, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11413182867401898 }, { "epoch": 1.3272938829787235, "grad_norm": 126.87769291316708, "learning_rate": 1.7054784195912733e-07, "loss": 0.1923, "step": 7985, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.449438202247191, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4875, "success_rate.epoch.env.logic": 0.5743073047858942, "success_rate.epoch.env.math": 0.9684873949579832, "success_rate.epoch.env.sat": 0.08783783783783784, "success_rate.epoch.env.science": 0.9478221415607986, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6452058387653585, "success_rate.epoch.global": 0.8428113152522602, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9984817813765182, "tokens_p.mean_below_band": 1.6079866327345371e-09, "tokens_p.mean_in_band": 0.565625, "tokens_rate.above_band": 0.9755134281200631, "tokens_rate.below_band": 0.0007898894154818325, "tokens_rate.in_band": 0.023696682464454975 }, { "epoch": 1.328125, "grad_norm": 243.32881266861213, "learning_rate": 1.7052436973123787e-07, "loss": 0.2514, "step": 7990, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.449438202247191, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4875, "success_rate.epoch.env.logic": 0.5753768844221105, "success_rate.epoch.env.math": 0.9684873949579832, "success_rate.epoch.env.sat": 0.08783783783783784, "success_rate.epoch.env.science": 0.9479402444545043, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6453138099044424, "success_rate.epoch.global": 0.8430858806404657, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951265389876881, "tokens_p.mean_in_band": 0.5965401785714286, "tokens_rate.above_band": 0.9812080536912752, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01879194630872483 }, { "epoch": 1.3289561170212765, "grad_norm": 125.78788675640281, "learning_rate": 1.705009143202707e-07, "loss": 0.3716, "step": 7995, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.449438202247191, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4875, "success_rate.epoch.env.logic": 0.5753768844221105, "success_rate.epoch.env.math": 0.9684873949579832, "success_rate.epoch.env.sat": 0.08783783783783784, "success_rate.epoch.env.science": 0.9481514878268711, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6453330138473848, "success_rate.epoch.global": 0.8434959349593496, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963450292397661, "tokens_p.mean_in_band": 0.6463913690476191, "tokens_rate.above_band": 0.9702127659574468, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029787234042553193 }, { "epoch": 1.3297872340425532, "grad_norm": 49.26032274536303, "learning_rate": 1.7047747576785347e-07, "loss": 0.1993, "step": 8000, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.449438202247191, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49382716049382713, "success_rate.epoch.env.logic": 0.5753768844221105, "success_rate.epoch.env.math": 0.9686192468619247, "success_rate.epoch.env.sat": 0.08783783783783784, "success_rate.epoch.env.science": 0.9481981981981982, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.64592444319003, "success_rate.epoch.global": 0.8437228182081763, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9932795698924731, "tokens_p.mean_in_band": 0.558203125, "tokens_rate.above_band": 0.9903194578896418, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00968054211035818 }, { "epoch": 1.3306183510638299, "grad_norm": 110.32576412481902, "learning_rate": 1.7045405411558382e-07, "loss": 0.2353, "step": 8005, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.449438202247191, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49382716049382713, "success_rate.epoch.env.logic": 0.5753768844221105, "success_rate.epoch.env.math": 0.9686847599164927, "success_rate.epoch.env.sat": 0.087248322147651, "success_rate.epoch.env.science": 0.9483610237988325, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6458916089141222, "success_rate.epoch.global": 0.8438403701561596, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9926697530864198, "tokens_p.mean_in_band": 0.6223958333333334, "tokens_rate.above_band": 0.9, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1 }, { "epoch": 1.3314494680851063, "grad_norm": 184.91759656046762, "learning_rate": 1.7043064940502947e-07, "loss": 0.3444, "step": 8010, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.449438202247191, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49382716049382713, "success_rate.epoch.env.logic": 0.5764411027568922, "success_rate.epoch.env.math": 0.9686847599164927, "success_rate.epoch.env.sat": 0.087248322147651, "success_rate.epoch.env.science": 0.948051948051948, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6459602582402947, "success_rate.epoch.global": 0.8438672438672439, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.997774092178771, "tokens_p.mean_below_band": 3.1650415621697903e-10, "tokens_p.mean_in_band": 0.7172475961538461, "tokens_rate.above_band": 0.9808219178082191, "tokens_rate.below_band": 0.0013698630136986301, "tokens_rate.in_band": 0.01780821917808219 }, { "epoch": 1.332280585106383, "grad_norm": 57.22127420728385, "learning_rate": 1.7040726167772802e-07, "loss": 0.3172, "step": 8015, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.449438202247191, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.5775, "success_rate.epoch.env.math": 0.9686847599164927, "success_rate.epoch.env.sat": 0.08666666666666667, "success_rate.epoch.env.science": 0.9481911567664135, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6454688189704348, "success_rate.epoch.global": 0.8436960276338514, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9951657458563536, "tokens_p.mean_below_band": 2.2470914018413168e-13, "tokens_p.mean_in_band": 0.6779513888888888, "tokens_rate.above_band": 0.9501312335958005, "tokens_rate.below_band": 0.0026246719160104987, "tokens_rate.in_band": 0.047244094488188976 }, { "epoch": 1.3331117021276595, "grad_norm": 83.76120591697925, "learning_rate": 1.7038389097518702e-07, "loss": 0.1957, "step": 8020, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.449438202247191, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.5775, "success_rate.epoch.env.math": 0.9688149688149689, "success_rate.epoch.env.sat": 0.08666666666666667, "success_rate.epoch.env.science": 0.9483526268922529, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6454953352453726, "success_rate.epoch.global": 0.8440999138673557, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9866935483870968, "tokens_p.mean_in_band": 0.8458806818181818, "tokens_rate.above_band": 0.9337349397590361, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06626506024096386 }, { "epoch": 1.3339428191489362, "grad_norm": 116.76877475560637, "learning_rate": 1.7036053733888368e-07, "loss": 0.377, "step": 8025, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.449438202247191, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.5781637717121588, "success_rate.epoch.env.math": 0.9688796680497925, "success_rate.epoch.env.sat": 0.08609271523178808, "success_rate.epoch.env.science": 0.948421520675856, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6455156455449823, "success_rate.epoch.global": 0.8438842738470352, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9946826660600546, "tokens_p.mean_in_band": 0.6218428938356164, "tokens_rate.above_band": 0.8338391502276176, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1661608497723824 }, { "epoch": 1.3347739361702127, "grad_norm": 80.86425458268842, "learning_rate": 1.7033720081026498e-07, "loss": 0.2547, "step": 8030, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.45555555555555555, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.5792079207920792, "success_rate.epoch.env.math": 0.968944099378882, "success_rate.epoch.env.sat": 0.08552631578947369, "success_rate.epoch.env.science": 0.9485130936529073, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6461293825679016, "success_rate.epoch.global": 0.8439554158330952, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.997878590078329, "tokens_p.mean_in_band": 0.6351102941176471, "tokens_rate.above_band": 0.9575, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0425 }, { "epoch": 1.3356050531914894, "grad_norm": 51.291575262262754, "learning_rate": 1.7031388143074758e-07, "loss": 0.4153, "step": 8035, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.5792079207920792, "success_rate.epoch.env.math": 0.968944099378882, "success_rate.epoch.env.sat": 0.08552631578947369, "success_rate.epoch.env.science": 0.947787610619469, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6456083291097614, "success_rate.epoch.global": 0.8434559452523525, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.3888888888888889, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9935581140350878, "tokens_p.mean_in_band": 0.63984375, "tokens_rate.above_band": 0.9785407725321889, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02145922746781116 }, { "epoch": 1.336436170212766, "grad_norm": 120.25845189976242, "learning_rate": 1.7029057924171767e-07, "loss": 0.3245, "step": 8040, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.5792079207920792, "success_rate.epoch.env.math": 0.968944099378882, "success_rate.epoch.env.sat": 0.08496732026143791, "success_rate.epoch.env.science": 0.9479028697571744, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6455679894379132, "success_rate.epoch.global": 0.843438656419015, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9990646258503402, "tokens_p.mean_in_band": 0.6140802556818182, "tokens_rate.above_band": 0.9435173299101413, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05648267008985879 }, { "epoch": 1.3372672872340425, "grad_norm": 49.072460945603204, "learning_rate": 1.7026729428453087e-07, "loss": 0.2834, "step": 8045, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.5777777777777777, "success_rate.epoch.env.math": 0.9690721649484536, "success_rate.epoch.env.sat": 0.08496732026143791, "success_rate.epoch.env.science": 0.9479947113265755, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6454579679947015, "success_rate.epoch.global": 0.8434659090909091, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9908364661654135, "tokens_p.mean_in_band": 0.7002467105263158, "tokens_rate.above_band": 0.9333333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06666666666666667 }, { "epoch": 1.3380984042553192, "grad_norm": 39.61924425235563, "learning_rate": 1.702440266005123e-07, "loss": 0.3767, "step": 8050, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4878048780487805, "success_rate.epoch.env.logic": 0.5763546798029556, "success_rate.epoch.env.math": 0.9691991786447639, "success_rate.epoch.env.sat": 0.08496732026143791, "success_rate.epoch.env.science": 0.9480633802816901, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6453463847834835, "success_rate.epoch.global": 0.84344866704481, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961988304093568, "tokens_p.mean_in_band": 0.68359375, "tokens_rate.above_band": 0.981630309988519, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018369690011481057 }, { "epoch": 1.3389295212765957, "grad_norm": 69.17395267168801, "learning_rate": 1.7022077623095643e-07, "loss": 0.2954, "step": 8055, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4939759036144578, "success_rate.epoch.env.logic": 0.5763546798029556, "success_rate.epoch.env.math": 0.9693251533742331, "success_rate.epoch.env.sat": 0.08496732026143791, "success_rate.epoch.env.science": 0.9481318681318681, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6459250655239676, "success_rate.epoch.global": 0.8437146092865232, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9938033681214421, "tokens_p.mean_in_band": 0.7822779605263158, "tokens_rate.above_band": 0.9652014652014652, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0347985347985348 }, { "epoch": 1.3397606382978724, "grad_norm": 51.05556502055433, "learning_rate": 1.7019754321712695e-07, "loss": 0.392, "step": 8060, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4939759036144578, "success_rate.epoch.env.logic": 0.5763546798029556, "success_rate.epoch.env.math": 0.9693877551020408, "success_rate.epoch.env.sat": 0.08441558441558442, "success_rate.epoch.env.science": 0.9478070175438597, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6458510669143261, "success_rate.epoch.global": 0.8434586041254591, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9905857740585774, "tokens_p.mean_below_band": 1.126900315284729e-07, "tokens_p.mean_in_band": 0.7066865808823529, "tokens_rate.above_band": 0.8722627737226277, "tokens_rate.below_band": 0.0036496350364963502, "tokens_rate.in_band": 0.12408759124087591 }, { "epoch": 1.3405917553191489, "grad_norm": 38.75034963762497, "learning_rate": 1.7017432760025677e-07, "loss": 0.3619, "step": 8065, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.44565217391304346, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4939759036144578, "success_rate.epoch.env.logic": 0.5763546798029556, "success_rate.epoch.env.math": 0.9694501018329938, "success_rate.epoch.env.sat": 0.08387096774193549, "success_rate.epoch.env.science": 0.9478527607361963, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6453661756973474, "success_rate.epoch.global": 0.8431151241534989, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9965744570837642, "tokens_p.mean_in_band": 0.6471354166666666, "tokens_rate.above_band": 0.9757820383451059, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024217961654894045 }, { "epoch": 1.3414228723404256, "grad_norm": 130.59590338037611, "learning_rate": 1.7015112942154793e-07, "loss": 0.4773, "step": 8070, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.44565217391304346, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5749385749385749, "success_rate.epoch.env.math": 0.967479674796748, "success_rate.epoch.env.sat": 0.08387096774193549, "success_rate.epoch.env.science": 0.9479440069991251, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.645614249401697, "success_rate.epoch.global": 0.8428611658687694, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9945293942992874, "tokens_p.mean_below_band": 2.1047890186309814e-07, "tokens_p.mean_in_band": 0.5278463084795322, "tokens_rate.above_band": 0.8911939034716342, "tokens_rate.below_band": 0.00021168501270110075, "tokens_rate.in_band": 0.1085944115156647 }, { "epoch": 1.3422539893617023, "grad_norm": 141.17121286279115, "learning_rate": 1.701279487221715e-07, "loss": 0.2639, "step": 8075, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.44565217391304346, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5749385749385749, "success_rate.epoch.env.math": 0.967741935483871, "success_rate.epoch.env.sat": 0.08387096774193549, "success_rate.epoch.env.science": 0.9475982532751092, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6456066591256158, "success_rate.epoch.global": 0.8429334082607474, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9166666666666667, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9922445255474452, "tokens_p.mean_below_band": 4.6798959374427795e-08, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9856115107913669, "tokens_rate.below_band": 0.007194244604316547, "tokens_rate.in_band": 0.007194244604316547 }, { "epoch": 1.3430851063829787, "grad_norm": 96.49923872240176, "learning_rate": 1.7010478554326752e-07, "loss": 0.3327, "step": 8080, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5749385749385749, "success_rate.epoch.env.math": 0.967741935483871, "success_rate.epoch.env.sat": 0.08387096774193549, "success_rate.epoch.env.science": 0.9468641114982579, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6460818034470623, "success_rate.epoch.global": 0.8426808749298934, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6428571428571428, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9948949824970829, "tokens_p.mean_in_band": 0.6080078125, "tokens_rate.above_band": 0.9345692475463467, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06543075245365322 }, { "epoch": 1.3439162234042552, "grad_norm": 86.9066090915904, "learning_rate": 1.7008163992594499e-07, "loss": 0.3098, "step": 8085, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5745721271393643, "success_rate.epoch.env.math": 0.967741935483871, "success_rate.epoch.env.sat": 0.08387096774193549, "success_rate.epoch.env.science": 0.9469565217391305, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6460568909417589, "success_rate.epoch.global": 0.8427092079485027, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9970649171270718, "tokens_p.mean_below_band": 2.7830537874251604e-10, "tokens_p.mean_in_band": 0.6667668269230769, "tokens_rate.above_band": 0.9890710382513661, "tokens_rate.below_band": 0.00078064012490242, "tokens_rate.in_band": 0.01014832162373146 }, { "epoch": 1.344747340425532, "grad_norm": 106.02571468541893, "learning_rate": 1.7005851191128172e-07, "loss": 0.2359, "step": 8090, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5745721271393643, "success_rate.epoch.env.math": 0.9678068410462777, "success_rate.epoch.env.sat": 0.08387096774193549, "success_rate.epoch.env.science": 0.9470945359930616, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6460753381977896, "success_rate.epoch.global": 0.8430167597765363, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9940476190476191, "tokens_p.mean_in_band": 0.7249540441176471, "tokens_rate.above_band": 0.9081081081081082, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0918918918918919 }, { "epoch": 1.3455784574468086, "grad_norm": 89.48001110103597, "learning_rate": 1.7003540154032425e-07, "loss": 0.2883, "step": 8095, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.44680851063829785, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058823529411764, "success_rate.epoch.env.logic": 0.5745721271393643, "success_rate.epoch.env.math": 0.9678068410462777, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.9471632741446514, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6461307076611219, "success_rate.epoch.global": 0.8427216954824317, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9901859504132231, "tokens_p.mean_in_band": 0.7865323153409091, "tokens_rate.above_band": 0.9428571428571428, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05714285714285714 }, { "epoch": 1.346409574468085, "grad_norm": 131.98335158202892, "learning_rate": 1.700123088540878e-07, "loss": 0.3112, "step": 8100, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.44680851063829785, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058823529411764, "success_rate.epoch.env.logic": 0.5745721271393643, "success_rate.epoch.env.math": 0.9659318637274549, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.9473002159827214, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.645972704435599, "success_rate.epoch.global": 0.8427935447968837, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99075, "tokens_p.mean_below_band": 3.4051481634378433e-09, "tokens_rate.above_band": 0.9920634920634921, "tokens_rate.below_band": 0.007936507936507936, "tokens_rate.in_band": 0.0 }, { "epoch": 1.3472406914893618, "grad_norm": 87.18761372198222, "learning_rate": 1.699892338935563e-07, "loss": 0.1673, "step": 8105, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.44680851063829785, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058823529411764, "success_rate.epoch.env.logic": 0.5766423357664233, "success_rate.epoch.env.math": 0.9659318637274549, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.9473911168607159, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6461691689360584, "success_rate.epoch.global": 0.8430555555555556, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9876143292682927, "tokens_p.mean_in_band": 0.7359227594339622, "tokens_rate.above_band": 0.8608923884514436, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13910761154855644 }, { "epoch": 1.3480718085106382, "grad_norm": 134.4144077412549, "learning_rate": 1.699661766996821e-07, "loss": 0.3375, "step": 8110, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.44680851063829785, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058823529411764, "success_rate.epoch.env.logic": 0.5766423357664233, "success_rate.epoch.env.math": 0.9659318637274549, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.9475494411006019, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6461835620487753, "success_rate.epoch.global": 0.8433601330745772, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0003379783693844, "tokens_p.mean_in_band": 0.6365740740740741, "tokens_rate.above_band": 0.9780309194467046, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021969080553295363 }, { "epoch": 1.348902925531915, "grad_norm": 223.12720514740883, "learning_rate": 1.6994313731338607e-07, "loss": 0.4122, "step": 8115, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.44680851063829785, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058823529411764, "success_rate.epoch.env.logic": 0.5748792270531401, "success_rate.epoch.env.math": 0.966, "success_rate.epoch.env.sat": 0.08333333333333333, "success_rate.epoch.env.science": 0.9476619476619477, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.646039701514285, "success_rate.epoch.global": 0.8431969026548672, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.996791074249605, "tokens_p.mean_in_band": 0.4778645833333333, "tokens_rate.above_band": 0.965675057208238, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034324942791762014 }, { "epoch": 1.3497340425531914, "grad_norm": 44.49840690034461, "learning_rate": 1.6992011577555752e-07, "loss": 0.3307, "step": 8120, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.44680851063829785, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5748792270531401, "success_rate.epoch.env.math": 0.9660678642714571, "success_rate.epoch.env.sat": 0.08280254777070063, "success_rate.epoch.env.science": 0.9477739726027398, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6454730424877796, "success_rate.epoch.global": 0.8429911699779249, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.985316265060241, "tokens_p.mean_in_band": 0.76171875, "tokens_rate.above_band": 0.8767605633802817, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12323943661971831 }, { "epoch": 1.350565159574468, "grad_norm": 47.988907313821656, "learning_rate": 1.6989711212705395e-07, "loss": 0.317, "step": 8125, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.44680851063829785, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5734939759036145, "success_rate.epoch.env.math": 0.9662698412698413, "success_rate.epoch.env.sat": 0.08227848101265822, "success_rate.epoch.env.science": 0.9478409576742197, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6453239192298065, "success_rate.epoch.global": 0.8427863436123348, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9947558459422283, "tokens_p.mean_in_band": 0.6508413461538461, "tokens_rate.above_band": 0.9654714475431607, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034528552456839307 }, { "epoch": 1.3513962765957448, "grad_norm": 133.8318294727147, "learning_rate": 1.6987412640870132e-07, "loss": 0.3788, "step": 8130, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.4421052631578947, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5721153846153846, "success_rate.epoch.env.math": 0.9664031620553359, "success_rate.epoch.env.sat": 0.08227848101265822, "success_rate.epoch.env.science": 0.9479522184300341, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6447932594818679, "success_rate.epoch.global": 0.842625652293326, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972993827160493, "tokens_p.mean_in_band": 0.3767903645833333, "tokens_rate.above_band": 0.9908256880733946, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009174311926605505 }, { "epoch": 1.3522273936170213, "grad_norm": 118.77883537195326, "learning_rate": 1.6985115866129357e-07, "loss": 0.1919, "step": 8135, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.4421052631578947, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5721153846153846, "success_rate.epoch.env.math": 0.9664694280078896, "success_rate.epoch.env.sat": 0.08176100628930817, "success_rate.epoch.env.science": 0.948018747337026, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6447582885851584, "success_rate.epoch.global": 0.8425671969281404, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9917953667953668, "tokens_p.mean_in_band": 0.7801724137931034, "tokens_rate.above_band": 0.8993055555555556, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10069444444444445 }, { "epoch": 1.3530585106382977, "grad_norm": 348.231295807114, "learning_rate": 1.698282089255928e-07, "loss": 0.2543, "step": 8140, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.4421052631578947, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5721153846153846, "success_rate.epoch.env.math": 0.9666011787819253, "success_rate.epoch.env.sat": 0.08176100628930817, "success_rate.epoch.env.science": 0.9481292517006803, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6447803117794939, "success_rate.epoch.global": 0.8428688748973446, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9898200757575758, "tokens_p.mean_in_band": 0.75341796875, "tokens_rate.above_band": 0.9924812030075187, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007518796992481203 }, { "epoch": 1.3538896276595744, "grad_norm": 0.0, "learning_rate": 1.6980527724232927e-07, "loss": 0.2454, "step": 8145, "success_rate.epoch.env.abd": 0.5263157894736842, "success_rate.epoch.env.agentgym:alfworld": 0.4421052631578947, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5707434052757794, "success_rate.epoch.env.math": 0.9666666666666667, "success_rate.epoch.env.sat": 0.08125, "success_rate.epoch.env.science": 0.9473907509546033, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6445479482803803, "success_rate.epoch.global": 0.8421196394427752, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.41666666666666663, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9978362183754993, "tokens_p.mean_in_band": 0.6311910377358491, "tokens_rate.above_band": 0.9340796019900498, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06592039800995025 }, { "epoch": 1.3547207446808511, "grad_norm": 10.303336966261215, "learning_rate": 1.6978236365220102e-07, "loss": 0.3853, "step": 8150, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4479166666666667, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5717703349282297, "success_rate.epoch.env.math": 0.9666666666666667, "success_rate.epoch.env.sat": 0.08125, "success_rate.epoch.env.science": 0.947011445527766, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6427427881222623, "success_rate.epoch.global": 0.8417894162575014, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.43333333333333335, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9938186813186813, "tokens_p.mean_below_band": 4.6293280320242047e-10, "tokens_p.mean_in_band": 0.6172930743243243, "tokens_rate.above_band": 0.9210526315789473, "tokens_rate.below_band": 0.004048582995951417, "tokens_rate.in_band": 0.07489878542510121 }, { "epoch": 1.3555518617021276, "grad_norm": 77.38340136478867, "learning_rate": 1.6975946819587407e-07, "loss": 0.3095, "step": 8155, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4479166666666667, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4942528735632184, "success_rate.epoch.env.logic": 0.5717703349282297, "success_rate.epoch.env.math": 0.9667318982387475, "success_rate.epoch.env.sat": 0.08125, "success_rate.epoch.env.science": 0.9470787468247248, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6422323705251948, "success_rate.epoch.global": 0.8417324979569599, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9848977695167286, "tokens_p.mean_in_band": 0.81234375, "tokens_rate.above_band": 0.9149659863945578, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08503401360544217 }, { "epoch": 1.3563829787234043, "grad_norm": 26.713054797983926, "learning_rate": 1.6973659091398227e-07, "loss": 0.2446, "step": 8160, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4479166666666667, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48863636363636365, "success_rate.epoch.env.logic": 0.5717703349282297, "success_rate.epoch.env.math": 0.9667318982387475, "success_rate.epoch.env.sat": 0.08125, "success_rate.epoch.env.science": 0.9472351203039258, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.641735994484499, "success_rate.epoch.global": 0.8418048382712694, "success_rate.window.env.ded": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961436909394108, "tokens_p.mean_below_band": 1.3597309589385986e-07, "tokens_p.mean_in_band": 0.5226629849137931, "tokens_rate.above_band": 0.9115784139853053, "tokens_rate.below_band": 0.0002533569799847986, "tokens_rate.in_band": 0.0881682290347099 }, { "epoch": 1.3572140957446808, "grad_norm": 44.70321260397381, "learning_rate": 1.6971373184712716e-07, "loss": 0.2363, "step": 8165, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.4536082474226804, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48863636363636365, "success_rate.epoch.env.logic": 0.5727923627684964, "success_rate.epoch.env.math": 0.9668615984405458, "success_rate.epoch.env.sat": 0.08125, "success_rate.epoch.env.science": 0.9473240623683101, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6423661991083592, "success_rate.epoch.global": 0.8421480878763222, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956113801452785, "tokens_p.mean_in_band": 0.70068359375, "tokens_rate.above_band": 0.9627039627039627, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037296037296037296 }, { "epoch": 1.3580452127659575, "grad_norm": 64.2790938183128, "learning_rate": 1.6969089103587802e-07, "loss": 0.2348, "step": 8170, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.45918367346938777, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48863636363636365, "success_rate.epoch.env.logic": 0.5727923627684964, "success_rate.epoch.env.math": 0.9668615984405458, "success_rate.epoch.env.sat": 0.08125, "success_rate.epoch.env.science": 0.9470142977291842, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6428448955999575, "success_rate.epoch.global": 0.8421337665854319, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9958920187793427, "tokens_p.mean_in_band": 0.6438802083333334, "tokens_rate.above_band": 0.9659863945578231, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034013605442176874 }, { "epoch": 1.358876329787234, "grad_norm": 55.08817867861605, "learning_rate": 1.6966806852077167e-07, "loss": 0.2433, "step": 8175, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.45918367346938777, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48863636363636365, "success_rate.epoch.env.logic": 0.5738095238095238, "success_rate.epoch.env.math": 0.9669260700389105, "success_rate.epoch.env.sat": 0.08074534161490683, "success_rate.epoch.env.science": 0.947103274559194, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.642905436607622, "success_rate.epoch.global": 0.8421621621621621, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9941479400749064, "tokens_p.mean_in_band": 0.5859375, "tokens_rate.above_band": 0.9434628975265018, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05653710247349823 }, { "epoch": 1.3597074468085106, "grad_norm": 71.34830114272714, "learning_rate": 1.6964526434231247e-07, "loss": 0.3126, "step": 8180, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.45918367346938777, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48863636363636365, "success_rate.epoch.env.logic": 0.5758293838862559, "success_rate.epoch.env.math": 0.9669902912621359, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.9467728415758592, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6430195475423459, "success_rate.epoch.global": 0.8419633225458468, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9924045138888888, "tokens_p.mean_in_band": 0.6516544117647058, "tokens_rate.above_band": 0.9442622950819672, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05573770491803279 }, { "epoch": 1.3605385638297873, "grad_norm": 208.77030032587822, "learning_rate": 1.6962247854097228e-07, "loss": 0.3752, "step": 8185, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.46464646464646464, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48863636363636365, "success_rate.epoch.env.logic": 0.5731132075471698, "success_rate.epoch.env.math": 0.9670542635658915, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.9468619246861925, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6432831539288986, "success_rate.epoch.global": 0.8417653390742734, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9976071975497703, "tokens_p.mean_in_band": 0.5317708333333333, "tokens_rate.above_band": 0.9666913397483345, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033308660251665435 }, { "epoch": 1.3613696808510638, "grad_norm": 27.360320610012824, "learning_rate": 1.695997111571903e-07, "loss": 0.3352, "step": 8190, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.46464646464646464, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48863636363636365, "success_rate.epoch.env.logic": 0.5727699530516432, "success_rate.epoch.env.math": 0.9671179883945842, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.9469063545150501, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6432617812163554, "success_rate.epoch.global": 0.8417092179521634, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9971072931276297, "tokens_p.mean_in_band": 0.48214285714285715, "tokens_rate.above_band": 0.9807427785419532, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01925722145804677 }, { "epoch": 1.3622007978723405, "grad_norm": 0.0, "learning_rate": 1.6957696223137306e-07, "loss": 0.2413, "step": 8195, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.46464646464646464, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48863636363636365, "success_rate.epoch.env.logic": 0.5727699530516432, "success_rate.epoch.env.math": 0.9671814671814671, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.9470391993327774, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6432796288167745, "success_rate.epoch.global": 0.8420064377682404, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9849806201550387, "tokens_p.mean_in_band": 0.8196614583333334, "tokens_rate.above_band": 0.9555555555555556, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044444444444444446 }, { "epoch": 1.363031914893617, "grad_norm": 52.32346758828354, "learning_rate": 1.6955423180389433e-07, "loss": 0.3667, "step": 8200, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.46464646464646464, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48863636363636365, "success_rate.epoch.env.logic": 0.5710955710955711, "success_rate.epoch.env.math": 0.9672447013487476, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.9471273938384679, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6431411785183107, "success_rate.epoch.global": 0.8418094218415417, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9966820987654321, "tokens_p.mean_in_band": 0.5223214285714286, "tokens_rate.above_band": 0.9774738535800482, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02252614641995173 }, { "epoch": 1.3638630319148937, "grad_norm": 61.841807043450736, "learning_rate": 1.6953151991509505e-07, "loss": 0.2597, "step": 8205, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.46464646464646464, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48863636363636365, "success_rate.epoch.env.logic": 0.5710955710955711, "success_rate.epoch.env.math": 0.9673076923076923, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.9472591362126246, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6431588815485926, "success_rate.epoch.global": 0.8421052631578947, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967603668261563, "tokens_p.mean_in_band": 0.5926339285714286, "tokens_rate.above_band": 0.9889589905362776, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011041009463722398 }, { "epoch": 1.3646941489361701, "grad_norm": 43.16026661712374, "learning_rate": 1.6950882660528325e-07, "loss": 0.4053, "step": 8210, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.46464646464646464, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48863636363636365, "success_rate.epoch.env.logic": 0.5720930232558139, "success_rate.epoch.env.math": 0.9673076923076923, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.9469759734879868, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6432238169518295, "success_rate.epoch.global": 0.8421333333333333, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.7777777777777778, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9933035714285714, "tokens_p.mean_below_band": 7.566995918750763e-10, "tokens_p.mean_in_band": 0.584375, "tokens_rate.above_band": 0.9680851063829787, "tokens_rate.below_band": 0.005319148936170213, "tokens_rate.in_band": 0.026595744680851064 }, { "epoch": 1.3655252659574468, "grad_norm": 195.18112815856313, "learning_rate": 1.6948615191473402e-07, "loss": 0.2825, "step": 8215, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.46464646464646464, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48314606741573035, "success_rate.epoch.env.logic": 0.5720930232558139, "success_rate.epoch.env.math": 0.9674329501915708, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.9470417873396773, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.642742069271369, "success_rate.epoch.global": 0.8421192758253461, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9914638269100744, "tokens_p.mean_below_band": 6.845220923423767e-08, "tokens_p.mean_in_band": 0.6799232219827587, "tokens_rate.above_band": 0.8639018691588785, "tokens_rate.below_band": 0.0005841121495327102, "tokens_rate.in_band": 0.13551401869158877 }, { "epoch": 1.3663563829787235, "grad_norm": 61.03500435577016, "learning_rate": 1.6946349588368937e-07, "loss": 0.2848, "step": 8220, "success_rate.epoch.env.abd": 0.5, "success_rate.epoch.env.agentgym:alfworld": 0.46464646464646464, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48314606741573035, "success_rate.epoch.env.logic": 0.5707656612529002, "success_rate.epoch.env.math": 0.9674329501915708, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.9471947194719472, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6426353029194923, "success_rate.epoch.global": 0.8421891604675876, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0004284490145672, "tokens_p.mean_in_band": 0.4954769736842105, "tokens_rate.above_band": 0.9684647302904564, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03153526970954357 }, { "epoch": 1.3671875, "grad_norm": 101.80055787478545, "learning_rate": 1.6944085855235816e-07, "loss": 0.2511, "step": 8225, "success_rate.epoch.env.abd": 0.5238095238095238, "success_rate.epoch.env.agentgym:alfworld": 0.46464646464646464, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48314606741573035, "success_rate.epoch.env.logic": 0.5704387990762124, "success_rate.epoch.env.math": 0.9674952198852772, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.9472817133443163, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.644783659755757, "success_rate.epoch.global": 0.8422587486744433, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992133867276888, "tokens_p.mean_in_band": 0.56640625, "tokens_rate.above_band": 0.9820224719101124, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017977528089887642 }, { "epoch": 1.3680186170212765, "grad_norm": 98.99828634082701, "learning_rate": 1.694182399609162e-07, "loss": 0.3233, "step": 8230, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.46464646464646464, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48314606741573035, "success_rate.epoch.env.logic": 0.5714285714285714, "success_rate.epoch.env.math": 0.9674952198852772, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.947000821692687, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.64681583269628, "success_rate.epoch.global": 0.8423280423280424, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9917346014492754, "tokens_p.mean_in_band": 0.62890625, "tokens_rate.above_band": 0.9616724738675958, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03832752613240418 }, { "epoch": 1.3688497340425532, "grad_norm": 83.78584689142346, "learning_rate": 1.6939564014950588e-07, "loss": 0.2912, "step": 8235, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.46464646464646464, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48314606741573035, "success_rate.epoch.env.logic": 0.5711009174311926, "success_rate.epoch.env.math": 0.9676190476190476, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.9470877768662839, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6468052080517332, "success_rate.epoch.global": 0.8423970432946146, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9884969325153374, "tokens_p.mean_in_band": 0.7466145833333333, "tokens_rate.above_band": 0.9560117302052786, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04398826979472141 }, { "epoch": 1.3696808510638299, "grad_norm": 124.54686648700506, "learning_rate": 1.6937305915823634e-07, "loss": 0.2057, "step": 8240, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.46464646464646464, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5711009174311926, "success_rate.epoch.env.math": 0.967741935483871, "success_rate.epoch.env.sat": 0.08024691358024691, "success_rate.epoch.env.science": 0.9471744471744472, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6473463334741102, "success_rate.epoch.global": 0.8426877470355731, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951923076923077, "tokens_p.mean_in_band": 0.52734375, "tokens_rate.above_band": 0.9933554817275747, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006644518272425249 }, { "epoch": 1.3705119680851063, "grad_norm": 43.62444350218963, "learning_rate": 1.693504970271833e-07, "loss": 0.2965, "step": 8245, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.46464646464646464, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5707762557077626, "success_rate.epoch.env.math": 0.967741935483871, "success_rate.epoch.env.sat": 0.07926829268292683, "success_rate.epoch.env.science": 0.947260834014718, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6472357065849756, "success_rate.epoch.global": 0.8422298185642914, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9977490996398559, "tokens_p.mean_in_band": 0.6370849609375, "tokens_rate.above_band": 0.9286510590858417, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0713489409141583 }, { "epoch": 1.371343085106383, "grad_norm": 51.00846567410458, "learning_rate": 1.6932795379638901e-07, "loss": 0.5359, "step": 8250, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.46, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5555555555555556, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5694760820045558, "success_rate.epoch.env.math": 0.967741935483871, "success_rate.epoch.env.sat": 0.07878787878787878, "success_rate.epoch.env.science": 0.9473684210526315, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6466612097479932, "success_rate.epoch.global": 0.8417738126475991, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9987186788154897, "tokens_p.mean_in_band": 0.5801205842391305, "tokens_rate.above_band": 0.9502164502164502, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049783549783549784 }, { "epoch": 1.3721742021276595, "grad_norm": 80.15349101983222, "learning_rate": 1.6930542950586225e-07, "loss": 0.2597, "step": 8255, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.46, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5704545454545454, "success_rate.epoch.env.math": 0.9678030303030303, "success_rate.epoch.env.sat": 0.07878787878787878, "success_rate.epoch.env.science": 0.9474969474969475, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6417168942168942, "success_rate.epoch.global": 0.8418848167539267, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983513189448441, "tokens_p.mean_in_band": 0.6172902960526315, "tokens_rate.above_band": 0.9705197827773467, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02948021722265322 }, { "epoch": 1.3730053191489362, "grad_norm": 70.0812423905079, "learning_rate": 1.6928292419557804e-07, "loss": 0.2137, "step": 8260, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.45544554455445546, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5704545454545454, "success_rate.epoch.env.math": 0.9678638941398866, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9476248477466505, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6418245105124147, "success_rate.epoch.global": 0.8419952990336903, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951121794871794, "tokens_p.mean_in_band": 0.7487980769230769, "tokens_rate.above_band": 0.967741935483871, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03225806451612903 }, { "epoch": 1.3738364361702127, "grad_norm": 157.4674575578166, "learning_rate": 1.6926043790547782e-07, "loss": 0.263, "step": 8265, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.45544554455445546, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5704545454545454, "success_rate.epoch.env.math": 0.9679245283018868, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9477309562398704, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6418396689356165, "success_rate.epoch.global": 0.8422425032594524, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986861861861862, "tokens_p.mean_in_band": 0.75390625, "tokens_rate.above_band": 0.9940298507462687, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005970149253731343 }, { "epoch": 1.3746675531914894, "grad_norm": 127.5671503534398, "learning_rate": 1.6923797067546922e-07, "loss": 0.3876, "step": 8270, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.45544554455445546, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5714285714285714, "success_rate.epoch.env.math": 0.9680451127819549, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9478155339805825, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6419468678624173, "success_rate.epoch.global": 0.8425299323269131, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972345132743363, "tokens_p.mean_in_band": 0.5993303571428571, "tokens_rate.above_band": 0.9797687861271677, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02023121387283237 }, { "epoch": 1.375498670212766, "grad_norm": 108.84975969558603, "learning_rate": 1.6921552254542613e-07, "loss": 0.1988, "step": 8275, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.45544554455445546, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5714285714285714, "success_rate.epoch.env.math": 0.9680451127819549, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9479628882613957, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6419602637061275, "success_rate.epoch.global": 0.8428163159262146, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9851371951219512, "tokens_p.mean_in_band": 0.8268229166666666, "tokens_rate.above_band": 0.9647058823529412, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03529411764705882 }, { "epoch": 1.3763297872340425, "grad_norm": 159.07076061506493, "learning_rate": 1.6919309355518843e-07, "loss": 0.2876, "step": 8280, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.46078431372549017, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5714285714285714, "success_rate.epoch.env.math": 0.9680451127819549, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9481094127111827, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6424589267625659, "success_rate.epoch.global": 0.8431423386051335, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957256838905775, "tokens_p.mean_in_band": 0.63671875, "tokens_rate.above_band": 0.9909638554216867, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009036144578313253 }, { "epoch": 1.3771609042553192, "grad_norm": 48.778187610501774, "learning_rate": 1.691706837445621e-07, "loss": 0.3073, "step": 8285, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.4563106796116505, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5723981900452488, "success_rate.epoch.env.math": 0.9680451127819549, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9482135688478522, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6421498486388846, "success_rate.epoch.global": 0.843167701863354, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956395348837209, "tokens_p.mean_in_band": 0.746484375, "tokens_rate.above_band": 0.9748110831234257, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02518891687657431 }, { "epoch": 1.3779920212765957, "grad_norm": 69.05478907883565, "learning_rate": 1.6914829315331906e-07, "loss": 0.1656, "step": 8290, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.4563106796116505, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5733634311512416, "success_rate.epoch.env.math": 0.9682242990654205, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9483173076923077, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6422633182965132, "success_rate.epoch.global": 0.8435321456235476, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9917127071823204, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9945054945054945, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005494505494505495 }, { "epoch": 1.3788231382978724, "grad_norm": 74.68886340671088, "learning_rate": 1.6912592182119713e-07, "loss": 0.333, "step": 8295, "success_rate.epoch.env.abd": 0.5652173913043478, "success_rate.epoch.env.agentgym:alfworld": 0.4563106796116505, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5733634311512416, "success_rate.epoch.env.math": 0.9682835820895522, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9483793517406963, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6440709703803607, "success_rate.epoch.global": 0.8437338834450748, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986064659977704, "tokens_p.mean_in_band": 0.6478074596774194, "tokens_rate.above_band": 0.9665948275862069, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0334051724137931 }, { "epoch": 1.3796542553191489, "grad_norm": 120.32190112944522, "learning_rate": 1.6910356978789994e-07, "loss": 0.3921, "step": 8300, "success_rate.epoch.env.abd": 0.5652173913043478, "success_rate.epoch.env.agentgym:alfworld": 0.4563106796116505, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5752808988764045, "success_rate.epoch.env.math": 0.9683426443202979, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9484412470023981, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6442562817637798, "success_rate.epoch.global": 0.8439752832131823, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9960416666666667, "tokens_p.mean_in_band": 0.6607572115384616, "tokens_rate.above_band": 0.9877704609595485, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012229539040451553 }, { "epoch": 1.3804853723404256, "grad_norm": 65.6684037208593, "learning_rate": 1.6908123709309683e-07, "loss": 0.372, "step": 8305, "success_rate.epoch.env.abd": 0.5652173913043478, "success_rate.epoch.env.agentgym:alfworld": 0.4563106796116505, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5762331838565022, "success_rate.epoch.env.math": 0.9685185185185186, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9477045908183632, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6442918729450784, "success_rate.epoch.global": 0.8437419686455924, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9908494475138122, "tokens_p.mean_below_band": 5.115907697472721e-12, "tokens_p.mean_in_band": 0.4409950657894737, "tokens_rate.above_band": 0.900497512437811, "tokens_rate.below_band": 0.004975124378109453, "tokens_rate.in_band": 0.0945273631840796 }, { "epoch": 1.3813164893617023, "grad_norm": 114.16992487162783, "learning_rate": 1.6905892377642295e-07, "loss": 0.4041, "step": 8310, "success_rate.epoch.env.abd": 0.5652173913043478, "success_rate.epoch.env.agentgym:alfworld": 0.4519230769230769, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5762331838565022, "success_rate.epoch.env.math": 0.9686346863468634, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9477671451355661, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.643909247441167, "success_rate.epoch.global": 0.8437259430331023, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9981730769230769, "tokens_p.mean_in_band": 0.6603618421052632, "tokens_rate.above_band": 0.9715994020926756, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028400597907324365 }, { "epoch": 1.3821476063829787, "grad_norm": 153.63807539239286, "learning_rate": 1.6903662987747892e-07, "loss": 0.174, "step": 8315, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44761904761904764, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5762331838565022, "success_rate.epoch.env.math": 0.96875, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9478503184713376, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6451829201424273, "success_rate.epoch.global": 0.8437900128040973, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9918382352941176, "tokens_p.mean_in_band": 0.8151041666666666, "tokens_rate.above_band": 0.9860788863109049, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013921113689095127 }, { "epoch": 1.3829787234042552, "grad_norm": 42.72788035331363, "learning_rate": 1.6901435543583093e-07, "loss": 0.1334, "step": 8320, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44761904761904764, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4888888888888889, "success_rate.epoch.env.logic": 0.5746102449888641, "success_rate.epoch.env.math": 0.9688644688644689, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9479332273449921, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6409210922711491, "success_rate.epoch.global": 0.8434227330779055, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9975706713780919, "tokens_p.mean_in_band": 0.5389344262295082, "tokens_rate.above_band": 0.9586720867208672, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041327913279132794 }, { "epoch": 1.383809840425532, "grad_norm": 100.76000031026607, "learning_rate": 1.6899210049101068e-07, "loss": 0.4322, "step": 8325, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44761904761904764, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4835164835164835, "success_rate.epoch.env.logic": 0.5746102449888641, "success_rate.epoch.env.math": 0.968978102189781, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9480570975416336, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6404542830120171, "success_rate.epoch.global": 0.8435270132517839, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9869981751824818, "tokens_p.mean_in_band": 0.20414624183006536, "tokens_rate.above_band": 0.4724137931034483, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.5275862068965518 }, { "epoch": 1.3846409574468086, "grad_norm": 50.39568366541492, "learning_rate": 1.6896986508251527e-07, "loss": 0.3325, "step": 8330, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4835164835164835, "success_rate.epoch.env.logic": 0.5746102449888641, "success_rate.epoch.env.math": 0.968978102189781, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9482008699090549, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6400834603905142, "success_rate.epoch.global": 0.8435910478128179, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9930555555555556, "tokens_p.mean_in_band": 0.71875, "tokens_rate.above_band": 0.9924812030075187, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007518796992481203 }, { "epoch": 1.385472074468085, "grad_norm": 77.04914310728238, "learning_rate": 1.689476492498071e-07, "loss": 0.4162, "step": 8335, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4782608695652174, "success_rate.epoch.env.logic": 0.5733333333333334, "success_rate.epoch.env.math": 0.968978102189781, "success_rate.epoch.env.sat": 0.08433734939759036, "success_rate.epoch.env.science": 0.9482826687722069, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6394970306865465, "success_rate.epoch.global": 0.8433214829862875, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9971677623855365, "tokens_p.mean_in_band": 0.5416254940711462, "tokens_rate.above_band": 0.943927304964539, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.056072695035460994 }, { "epoch": 1.3863031914893618, "grad_norm": 97.20778958939513, "learning_rate": 1.6892545303231377e-07, "loss": 0.2281, "step": 8340, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4782608695652174, "success_rate.epoch.env.logic": 0.5752212389380531, "success_rate.epoch.env.math": 0.9690346083788707, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.9483845547675335, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6401815132022721, "success_rate.epoch.global": 0.8436787433493793, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9935405027932961, "tokens_p.mean_in_band": 0.8046875, "tokens_rate.above_band": 0.9808219178082191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019178082191780823 }, { "epoch": 1.3871343085106382, "grad_norm": 61.70549035738112, "learning_rate": 1.6890327646942816e-07, "loss": 0.3042, "step": 8345, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4782608695652174, "success_rate.epoch.env.logic": 0.5748898678414097, "success_rate.epoch.env.math": 0.9690346083788707, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.9485062893081762, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6401624553335448, "success_rate.epoch.global": 0.8437420986093552, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951923076923077, "tokens_p.mean_in_band": 0.5275493421052632, "tokens_rate.above_band": 0.9766009852216748, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023399014778325122 }, { "epoch": 1.387965425531915, "grad_norm": 100.48262737084313, "learning_rate": 1.6888111960050827e-07, "loss": 0.3931, "step": 8350, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4782608695652174, "success_rate.epoch.env.logic": 0.5758241758241758, "success_rate.epoch.env.math": 0.9690909090909091, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.9485871271585558, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6402598595649253, "success_rate.epoch.global": 0.8439787932340318, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9903381642512077, "tokens_p.mean_in_band": 0.708984375, "tokens_rate.above_band": 0.9627906976744186, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.037209302325581395 }, { "epoch": 1.3887965425531914, "grad_norm": 199.91981455687412, "learning_rate": 1.6885898246487707e-07, "loss": 0.3211, "step": 8355, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4782608695652174, "success_rate.epoch.env.logic": 0.5767543859649122, "success_rate.epoch.env.math": 0.969147005444646, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.9487079091620987, "success_rate.epoch.env.webshop": 1.0, "success_rate.epoch.env_macro_mean": 0.6403605039738358, "success_rate.epoch.global": 0.8442932728647015, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9914647577092511, "tokens_p.mean_in_band": 0.8815104166666666, "tokens_rate.above_band": 0.9869565217391304, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013043478260869565 }, { "epoch": 1.389627659574468, "grad_norm": 298.52706127773706, "learning_rate": 1.6883686510182255e-07, "loss": 0.2553, "step": 8360, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4782608695652174, "success_rate.epoch.env.logic": 0.5776805251641138, "success_rate.epoch.env.math": 0.9692028985507246, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.9487881157154027, "success_rate.epoch.env.webshop": 0.6666666666666666, "success_rate.epoch.env_macro_mean": 0.6101540408397678, "success_rate.epoch.global": 0.8443158953722334, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9892761394101877, "tokens_p.mean_in_band": 0.4809229651162791, "tokens_rate.above_band": 0.6844036697247706, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.3155963302752294 }, { "epoch": 1.3904587765957448, "grad_norm": 48.18066271543616, "learning_rate": 1.6881476755059767e-07, "loss": 0.3036, "step": 8365, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.5764192139737991, "success_rate.epoch.env.math": 0.9692028985507246, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.9488481062085123, "success_rate.epoch.env.webshop": 0.6666666666666666, "success_rate.epoch.env_macro_mean": 0.6100868784754856, "success_rate.epoch.global": 0.8440482169763938, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952848344880677, "tokens_p.mean_in_band": 0.7495349702380952, "tokens_rate.above_band": 0.9840909090909091, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015909090909090907 }, { "epoch": 1.3912898936170213, "grad_norm": 57.08815032106201, "learning_rate": 1.687926898504201e-07, "loss": 0.2079, "step": 8370, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.5760869565217391, "success_rate.epoch.env.math": 0.9692028985507246, "success_rate.epoch.env.sat": 0.08982035928143713, "success_rate.epoch.env.science": 0.9489676665368134, "success_rate.epoch.env.webshop": 0.6666666666666666, "success_rate.epoch.env_macro_mean": 0.6100675423733257, "success_rate.epoch.global": 0.844110275689223, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951862373737373, "tokens_p.mean_in_band": 0.6171875, "tokens_rate.above_band": 0.9826302729528535, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017369727047146403 }, { "epoch": 1.3921210106382977, "grad_norm": 77.09650797301903, "learning_rate": 1.6877063204047238e-07, "loss": 0.3386, "step": 8375, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.5760869565217391, "success_rate.epoch.env.math": 0.9692028985507246, "success_rate.epoch.env.sat": 0.08928571428571429, "success_rate.epoch.env.science": 0.9490668740279938, "success_rate.epoch.env.webshop": 0.6666666666666666, "success_rate.epoch.env_macro_mean": 0.6100279571456401, "success_rate.epoch.global": 0.8440940940940941, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9902895480225988, "tokens_p.mean_in_band": 0.6481370192307693, "tokens_rate.above_band": 0.8719211822660099, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12807881773399016 }, { "epoch": 1.3929521276595744, "grad_norm": 110.09370402287995, "learning_rate": 1.6874859415990175e-07, "loss": 0.5582, "step": 8380, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.5770065075921909, "success_rate.epoch.env.math": 0.969258589511754, "success_rate.epoch.env.sat": 0.08875739644970414, "success_rate.epoch.env.science": 0.9491656965463717, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5949260553317475, "success_rate.epoch.global": 0.8439450686641697, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9950358072916666, "tokens_p.mean_in_band": 0.646484375, "tokens_rate.above_band": 0.8135593220338984, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1864406779661017 }, { "epoch": 1.3937832446808511, "grad_norm": 154.69772930804822, "learning_rate": 1.6872657624781997e-07, "loss": 0.4721, "step": 8385, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.5770065075921909, "success_rate.epoch.env.math": 0.969258589511754, "success_rate.epoch.env.sat": 0.08823529411764706, "success_rate.epoch.env.science": 0.9492641363284275, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5948875405544747, "success_rate.epoch.global": 0.843929194714535, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9989436619718309, "tokens_p.mean_in_band": 0.6216856060606061, "tokens_rate.above_band": 0.955585464333782, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04441453566621804 }, { "epoch": 1.3946143617021276, "grad_norm": 91.78758005607338, "learning_rate": 1.6870457834330354e-07, "loss": 0.323, "step": 8390, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.577922077922078, "success_rate.epoch.env.math": 0.969258589511754, "success_rate.epoch.env.sat": 0.08823529411764706, "success_rate.epoch.env.science": 0.9493817619783617, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5949814674617312, "success_rate.epoch.global": 0.8442010950721752, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9880998452012384, "tokens_p.mean_in_band": 0.7905649038461539, "tokens_rate.above_band": 0.9613095238095238, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03869047619047619 }, { "epoch": 1.3954454787234043, "grad_norm": 51.38050078813915, "learning_rate": 1.6868260048539327e-07, "loss": 0.3091, "step": 8395, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.577922077922078, "success_rate.epoch.env.math": 0.969258589511754, "success_rate.epoch.env.sat": 0.08823529411764706, "success_rate.epoch.env.science": 0.9494988434849653, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5949921112350588, "success_rate.epoch.global": 0.8444333996023857, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9886924342105263, "tokens_p.mean_in_band": 0.7584635416666666, "tokens_rate.above_band": 0.9620253164556962, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0379746835443038 }, { "epoch": 1.3962765957446808, "grad_norm": 131.16956759583073, "learning_rate": 1.686606427130945e-07, "loss": 0.2383, "step": 8400, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4787234042553192, "success_rate.epoch.env.logic": 0.5788336933045356, "success_rate.epoch.env.math": 0.9693693693693693, "success_rate.epoch.env.sat": 0.0872093023255814, "success_rate.epoch.env.science": 0.9495571813631113, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5949970877192544, "success_rate.epoch.global": 0.8442460317460317, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9978210034013606, "tokens_p.mean_in_band": 0.6898311491935484, "tokens_rate.above_band": 0.9499192245557351, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.050080775444264945 }, { "epoch": 1.3971077127659575, "grad_norm": 20.40932897875633, "learning_rate": 1.686387050653769e-07, "loss": 0.2054, "step": 8405, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47368421052631576, "success_rate.epoch.env.logic": 0.575107296137339, "success_rate.epoch.env.math": 0.9694244604316546, "success_rate.epoch.env.sat": 0.0872093023255814, "success_rate.epoch.env.science": 0.9496153846153846, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5942105153027417, "success_rate.epoch.global": 0.8435643564356435, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9946627850557982, "tokens_p.mean_in_band": 0.6571691176470589, "tokens_rate.above_band": 0.9758522727272727, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.024147727272727272 }, { "epoch": 1.397938829787234, "grad_norm": 65.5731791262289, "learning_rate": 1.686167875811744e-07, "loss": 0.3979, "step": 8410, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46875, "success_rate.epoch.env.logic": 0.575107296137339, "success_rate.epoch.env.math": 0.9694793536804309, "success_rate.epoch.env.sat": 0.0872093023255814, "success_rate.epoch.env.science": 0.9497120921305182, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5937757325970684, "success_rate.epoch.global": 0.843587842846553, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9869473189087489, "tokens_p.mean_in_band": 0.5395791997354498, "tokens_rate.above_band": 0.7376821651630812, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2623178348369188 }, { "epoch": 1.3987699468085106, "grad_norm": 46.34687494719824, "learning_rate": 1.685948902993852e-07, "loss": 0.3938, "step": 8415, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46875, "success_rate.epoch.env.logic": 0.5738758029978587, "success_rate.epoch.env.math": 0.9695340501792115, "success_rate.epoch.env.sat": 0.08670520231213873, "success_rate.epoch.env.science": 0.9497506712696586, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5936264310047954, "success_rate.epoch.global": 0.8432872655478776, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9919225481209899, "tokens_p.mean_in_band": 0.760670731707317, "tokens_rate.above_band": 0.9637809187279152, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.036219081272084806 }, { "epoch": 1.3996010638297873, "grad_norm": 41.00890655977667, "learning_rate": 1.685730132588716e-07, "loss": 0.2116, "step": 8420, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46875, "success_rate.epoch.env.logic": 0.5765957446808511, "success_rate.epoch.env.math": 0.9695885509838998, "success_rate.epoch.env.sat": 0.08670520231213873, "success_rate.epoch.env.science": 0.9498660543436663, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5938891424194944, "success_rate.epoch.global": 0.8436730674544559, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9959054388133498, "tokens_p.mean_in_band": 0.7634548611111112, "tokens_rate.above_band": 0.988997555012225, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011002444987775062 }, { "epoch": 1.4004321808510638, "grad_norm": 52.45351729152351, "learning_rate": 1.6855115649845993e-07, "loss": 0.2796, "step": 8425, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46875, "success_rate.epoch.env.logic": 0.5774946921443737, "success_rate.epoch.env.math": 0.9697508896797153, "success_rate.epoch.env.sat": 0.08620689655172414, "success_rate.epoch.env.science": 0.9499235474006116, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5939455490973007, "success_rate.epoch.global": 0.8437346437346437, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9896864686468647, "tokens_p.mean_in_band": 0.6438802083333334, "tokens_rate.above_band": 0.9099099099099099, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09009009009009009 }, { "epoch": 1.4012632978723405, "grad_norm": 57.910756304430734, "learning_rate": 1.6852932005694071e-07, "loss": 0.3465, "step": 8430, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4639175257731959, "success_rate.epoch.env.logic": 0.5774946921443737, "success_rate.epoch.env.math": 0.9698581560283688, "success_rate.epoch.env.sat": 0.08571428571428572, "success_rate.epoch.env.science": 0.95, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5934781521776461, "success_rate.epoch.global": 0.8435507601765572, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9904442148760331, "tokens_p.mean_below_band": 1.6079866327345371e-09, "tokens_p.mean_in_band": 0.6223021582733813, "tokens_rate.above_band": 0.8383371824480369, "tokens_rate.below_band": 0.0011547344110854503, "tokens_rate.in_band": 0.1605080831408776 }, { "epoch": 1.402094414893617, "grad_norm": 41.27371947077507, "learning_rate": 1.6850750397306815e-07, "loss": 0.2101, "step": 8435, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.44339622641509435, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4639175257731959, "success_rate.epoch.env.logic": 0.5774946921443737, "success_rate.epoch.env.math": 0.9699115044247788, "success_rate.epoch.env.sat": 0.08571428571428572, "success_rate.epoch.env.science": 0.950152207001522, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5934968390320035, "success_rate.epoch.global": 0.8438952777098117, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.991869918699187, "tokens_p.mean_in_band": 0.8763020833333334, "tokens_rate.above_band": 0.9761904761904762, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023809523809523808 }, { "epoch": 1.4029255319148937, "grad_norm": 74.19727213794278, "learning_rate": 1.6848570828556055e-07, "loss": 0.2465, "step": 8440, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.4392523364485981, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4639175257731959, "success_rate.epoch.env.logic": 0.5792811839323467, "success_rate.epoch.env.math": 0.9700704225352113, "success_rate.epoch.env.sat": 0.08571428571428572, "success_rate.epoch.env.science": 0.9502090459901178, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5933021443884131, "success_rate.epoch.global": 0.843994140625, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975253222836096, "tokens_p.mean_in_band": 0.755859375, "tokens_rate.above_band": 0.9945054945054945, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005494505494505495 }, { "epoch": 1.4037566489361701, "grad_norm": 293.395395615618, "learning_rate": 1.6846393303309984e-07, "loss": 0.3409, "step": 8445, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.4392523364485981, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46938775510204084, "success_rate.epoch.env.logic": 0.580168776371308, "success_rate.epoch.env.math": 0.9702276707530648, "success_rate.epoch.env.sat": 0.08571428571428572, "success_rate.epoch.env.science": 0.9502090459901178, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5938944234780185, "success_rate.epoch.global": 0.8441843452816387, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9935379644588045, "tokens_p.mean_in_band": 0.6997282608695652, "tokens_rate.above_band": 0.9641744548286605, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03582554517133956 }, { "epoch": 1.4045877659574468, "grad_norm": 33.23971138607381, "learning_rate": 1.6844217825433176e-07, "loss": 0.3046, "step": 8450, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.4392523364485981, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.5789473684210527, "success_rate.epoch.env.math": 0.9703315881326352, "success_rate.epoch.env.sat": 0.08571428571428572, "success_rate.epoch.env.science": 0.9503034901365706, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5933703928525813, "success_rate.epoch.global": 0.8440389294403893, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942249568221071, "tokens_p.mean_in_band": 0.7004504504504504, "tokens_rate.above_band": 0.9125295508274232, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08747044917257683 }, { "epoch": 1.4054188829787235, "grad_norm": 101.36408097169931, "learning_rate": 1.6842044398786577e-07, "loss": 0.2966, "step": 8455, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.4392523364485981, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.5798319327731093, "success_rate.epoch.env.math": 0.9703832752613241, "success_rate.epoch.env.sat": 0.08571428571428572, "success_rate.epoch.env.science": 0.9504538577912254, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5934691764103449, "success_rate.epoch.global": 0.8444174757281553, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9913091715976331, "tokens_p.mean_in_band": 0.746875, "tokens_rate.above_band": 0.9712643678160919, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028735632183908046 }, { "epoch": 1.40625, "grad_norm": 71.99968433828516, "learning_rate": 1.6839873027227485e-07, "loss": 0.3234, "step": 8460, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.4392523364485981, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.5815899581589958, "success_rate.epoch.env.math": 0.9703832752613241, "success_rate.epoch.env.sat": 0.08571428571428572, "success_rate.epoch.env.science": 0.950566037735849, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.593639195076755, "success_rate.epoch.global": 0.844718992248062, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9908854166666666, "tokens_p.mean_in_band": 0.4971751412429379, "tokens_rate.above_band": 0.730593607305936, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2694063926940639 }, { "epoch": 1.4070811170212765, "grad_norm": 57.62338551044947, "learning_rate": 1.683770371460955e-07, "loss": 0.4822, "step": 8465, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.4392523364485981, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.58125, "success_rate.epoch.env.math": 0.9703832752613241, "success_rate.epoch.env.sat": 0.08522727272727272, "success_rate.epoch.env.science": 0.950640542577242, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5935707890490627, "success_rate.epoch.global": 0.8444981862152358, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986001317523057, "tokens_p.mean_in_band": 0.6627155172413793, "tokens_rate.above_band": 0.9631979695431472, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03680203045685279 }, { "epoch": 1.4079122340425532, "grad_norm": 85.41164953714559, "learning_rate": 1.683553646478277e-07, "loss": 0.2966, "step": 8470, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.4392523364485981, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.58125, "success_rate.epoch.env.math": 0.9705372616984402, "success_rate.epoch.env.sat": 0.08522727272727272, "success_rate.epoch.env.science": 0.9507148231753197, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5935915405977167, "success_rate.epoch.global": 0.8447609850313859, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9958118556701031, "tokens_p.mean_in_band": 0.498046875, "tokens_rate.above_band": 0.9974293059125964, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002570694087403599 }, { "epoch": 1.4087433510638299, "grad_norm": 94.69942747018248, "learning_rate": 1.6833371281593487e-07, "loss": 0.2865, "step": 8475, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.4392523364485981, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.58125, "success_rate.epoch.env.math": 0.9705882352941176, "success_rate.epoch.env.sat": 0.08522727272727272, "success_rate.epoch.env.science": 0.950844277673546, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5936079431517078, "success_rate.epoch.global": 0.8450602409638555, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994873395149786, "tokens_p.mean_in_band": 0.5691105769230769, "tokens_rate.above_band": 0.9642365887207703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03576341127922971 }, { "epoch": 1.4095744680851063, "grad_norm": 622.6886392493246, "learning_rate": 1.6831208168884368e-07, "loss": 0.235, "step": 8480, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.4392523364485981, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.58125, "success_rate.epoch.env.math": 0.9706390328151986, "success_rate.epoch.env.sat": 0.08522727272727272, "success_rate.epoch.env.science": 0.950954698614751, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5936225993755522, "success_rate.epoch.global": 0.8453211450565311, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.989448051948052, "tokens_p.mean_in_band": 0.439453125, "tokens_rate.above_band": 0.9935483870967742, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0064516129032258064 }, { "epoch": 1.410405585106383, "grad_norm": 29.6230016642961, "learning_rate": 1.6829047130494406e-07, "loss": 0.2267, "step": 8485, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.58125, "success_rate.epoch.env.math": 0.9707401032702238, "success_rate.epoch.env.sat": 0.08522727272727272, "success_rate.epoch.env.science": 0.9510463378176383, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5941121282531665, "success_rate.epoch.global": 0.8456182472989195, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979729729729729, "tokens_p.mean_below_band": 1.7497114868092467e-13, "tokens_p.mean_in_band": 0.6658653846153846, "tokens_rate.above_band": 0.9850905218317358, "tokens_rate.below_band": 0.0010649627263045794, "tokens_rate.in_band": 0.013844515441959531 }, { "epoch": 1.4112367021276595, "grad_norm": 227.04022765890267, "learning_rate": 1.682688817025892e-07, "loss": 0.3032, "step": 8490, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.58125, "success_rate.epoch.env.math": 0.9707401032702238, "success_rate.epoch.env.sat": 0.08522727272727272, "success_rate.epoch.env.science": 0.9512104283054004, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5850361364793267, "success_rate.epoch.global": 0.8457485029940119, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9943110972568578, "tokens_p.mean_in_band": 0.634765625, "tokens_rate.above_band": 0.9950372208436724, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004962779156327543 }, { "epoch": 1.4120678191489362, "grad_norm": 64.9213150845803, "learning_rate": 1.6824731292009531e-07, "loss": 0.3299, "step": 8495, "success_rate.epoch.env.abd": 0.5833333333333334, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.58298755186722, "success_rate.epoch.env.math": 0.9707903780068728, "success_rate.epoch.env.sat": 0.08522727272727272, "success_rate.epoch.env.science": 0.9508928571428571, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5851697960649018, "success_rate.epoch.global": 0.845730686438651, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942434210526315, "tokens_p.mean_in_band": 0.7126736111111112, "tokens_rate.above_band": 0.9547738693467337, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04522613065326633 }, { "epoch": 1.4128989361702127, "grad_norm": 54.897691443447165, "learning_rate": 1.682257649957417e-07, "loss": 0.1865, "step": 8500, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.58298755186722, "success_rate.epoch.env.math": 0.9707903780068728, "success_rate.epoch.env.sat": 0.0903954802259887, "success_rate.epoch.env.science": 0.9509840326773116, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5871630733103416, "success_rate.epoch.global": 0.8459885386819485, "success_rate.window.env.abd": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9833409926470589, "tokens_p.mean_in_band": 0.8663194444444444, "tokens_rate.above_band": 0.9679715302491103, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03202846975088968 }, { "epoch": 1.4137300531914894, "grad_norm": 102.09243416471436, "learning_rate": 1.6820423796777067e-07, "loss": 0.3828, "step": 8505, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.5838509316770186, "success_rate.epoch.env.math": 0.9708904109589042, "success_rate.epoch.env.sat": 0.0903954802259887, "success_rate.epoch.env.science": 0.9510748702742773, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5872589142520502, "success_rate.epoch.global": 0.84628217349857, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9931006493506493, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9871794871794872, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01282051282051282 }, { "epoch": 1.414561170212766, "grad_norm": 87.87693699337014, "learning_rate": 1.6818273187438742e-07, "loss": 0.2531, "step": 8510, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.5826446280991735, "success_rate.epoch.env.math": 0.9709401709401709, "success_rate.epoch.env.sat": 0.0903954802259887, "success_rate.epoch.env.science": 0.9511472982975574, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5871603582908413, "success_rate.epoch.global": 0.846263683960019, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9940318302387268, "tokens_p.mean_in_band": 0.6069444444444444, "tokens_rate.above_band": 0.9436795994993742, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.056320400500625784 }, { "epoch": 1.4153922872340425, "grad_norm": 46.8815807390635, "learning_rate": 1.6816124675376e-07, "loss": 0.3642, "step": 8515, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.5823045267489712, "success_rate.epoch.env.math": 0.9710391822827938, "success_rate.epoch.env.sat": 0.0903954802259887, "success_rate.epoch.env.science": 0.9512195121951219, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5871450059172036, "success_rate.epoch.global": 0.8463182897862233, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9989406779661016, "tokens_p.mean_in_band": 0.74765625, "tokens_rate.above_band": 0.9860724233983287, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013927576601671309 }, { "epoch": 1.4162234042553192, "grad_norm": 59.774205281413664, "learning_rate": 1.681397826440192e-07, "loss": 0.3651, "step": 8520, "success_rate.epoch.env.abd": 0.6153846153846154, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46464646464646464, "success_rate.epoch.env.logic": 0.5823045267489712, "success_rate.epoch.env.math": 0.9710391822827938, "success_rate.epoch.env.sat": 0.0903954802259887, "success_rate.epoch.env.science": 0.9509587020648967, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5885198973039664, "success_rate.epoch.global": 0.8463362580033199, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4642857142857143, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9909147869674185, "tokens_p.mean_in_band": 0.52734375, "tokens_rate.above_band": 0.869281045751634, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13071895424836602 }, { "epoch": 1.4170545212765957, "grad_norm": 57.75438079059562, "learning_rate": 1.6811833958325864e-07, "loss": 0.3606, "step": 8525, "success_rate.epoch.env.abd": 0.6153846153846154, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46, "success_rate.epoch.env.logic": 0.5823045267489712, "success_rate.epoch.env.math": 0.9711375212224108, "success_rate.epoch.env.sat": 0.0903954802259887, "success_rate.epoch.env.science": 0.951048951048951, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5881146357837124, "success_rate.epoch.global": 0.8463905325443787, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9887704174228675, "tokens_p.mean_in_band": 0.6736111111111112, "tokens_rate.above_band": 0.859594383775351, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14040561622464898 }, { "epoch": 1.4178856382978724, "grad_norm": 54.146553604113556, "learning_rate": 1.6809691760953443e-07, "loss": 0.2784, "step": 8530, "success_rate.epoch.env.abd": 0.6153846153846154, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.5831622176591376, "success_rate.epoch.env.math": 0.9711375212224108, "success_rate.epoch.env.sat": 0.0903954802259887, "success_rate.epoch.env.science": 0.9507533994854833, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5877516979565446, "success_rate.epoch.global": 0.8461356653273457, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.989856429463171, "tokens_p.mean_below_band": 1.0277290130034089e-10, "tokens_p.mean_in_band": 0.6682091346153847, "tokens_rate.above_band": 0.8594420600858369, "tokens_rate.below_band": 0.001072961373390558, "tokens_rate.in_band": 0.13948497854077252 }, { "epoch": 1.4187167553191489, "grad_norm": 47.83464306540245, "learning_rate": 1.680755167608653e-07, "loss": 0.2885, "step": 8535, "success_rate.epoch.env.abd": 0.6153846153846154, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.5831622176591376, "success_rate.epoch.env.math": 0.9712351945854484, "success_rate.epoch.env.sat": 0.0898876404494382, "success_rate.epoch.env.science": 0.9508076358296622, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5877193406775142, "success_rate.epoch.global": 0.8461175359924474, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9926927860696517, "tokens_p.mean_in_band": 0.6770582932692307, "tokens_rate.above_band": 0.8854625550660793, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1145374449339207 }, { "epoch": 1.4195478723404256, "grad_norm": 248.32031368998727, "learning_rate": 1.6805413707523256e-07, "loss": 0.4229, "step": 8540, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.5831622176591376, "success_rate.epoch.env.math": 0.9712837837837838, "success_rate.epoch.env.sat": 0.0898876404494382, "success_rate.epoch.env.science": 0.9505131964809385, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5856249885918405, "success_rate.epoch.global": 0.8458637756304501, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9804817275747508, "tokens_p.mean_in_band": 0.6345419847328244, "tokens_rate.above_band": 0.6967592592592593, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.30324074074074076 }, { "epoch": 1.4203789893617023, "grad_norm": 28.831742212183467, "learning_rate": 1.6803277859057993e-07, "loss": 0.1909, "step": 8545, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.5804480651731161, "success_rate.epoch.env.math": 0.9713322091062394, "success_rate.epoch.env.sat": 0.0898876404494382, "success_rate.epoch.env.science": 0.9505675576711827, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5853875916851748, "success_rate.epoch.global": 0.8454481298517996, "success_rate.window.env.logic": 0.25, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9991362631288004, "tokens_p.mean_in_band": 0.6020107581967213, "tokens_rate.above_band": 0.967379679144385, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032620320855614976 }, { "epoch": 1.4212101063829787, "grad_norm": 113.12793535026168, "learning_rate": 1.6801144134481347e-07, "loss": 0.2344, "step": 8550, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.5804480651731161, "success_rate.epoch.env.math": 0.9713322091062394, "success_rate.epoch.env.sat": 0.0898876404494382, "success_rate.epoch.env.science": 0.9507119386637459, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5854007172299535, "success_rate.epoch.global": 0.8457384362526414, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951149425287357, "tokens_p.mean_in_band": 0.6796875, "tokens_rate.above_band": 0.9954233409610984, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004576659038901602 }, { "epoch": 1.4220412234042552, "grad_norm": 97.25920413395448, "learning_rate": 1.6799012537580156e-07, "loss": 0.2869, "step": 8555, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.5813008130081301, "success_rate.epoch.env.math": 0.9713322091062394, "success_rate.epoch.env.sat": 0.0898876404494382, "success_rate.epoch.env.science": 0.9507838133430551, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5854847738221646, "success_rate.epoch.global": 0.8459193245778611, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978054775280899, "tokens_p.mean_in_band": 0.76171875, "tokens_rate.above_band": 0.9888888888888889, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011111111111111112 }, { "epoch": 1.422872340425532, "grad_norm": 133.72499691704502, "learning_rate": 1.679688307213748e-07, "loss": 0.3424, "step": 8560, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.5826612903225806, "success_rate.epoch.env.math": 0.9713322091062394, "success_rate.epoch.env.sat": 0.0898876404494382, "success_rate.epoch.env.science": 0.9508733624454149, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.585616594405511, "success_rate.epoch.global": 0.8460098291598409, "success_rate.window.env.logic": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975429975429976, "tokens_p.mean_in_band": 0.5405016447368421, "tokens_rate.above_band": 0.9771908763505402, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022809123649459785 }, { "epoch": 1.4237034574468086, "grad_norm": 93.43347688320038, "learning_rate": 1.6794755741932601e-07, "loss": 0.3283, "step": 8565, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.5826612903225806, "success_rate.epoch.env.math": 0.9713804713804713, "success_rate.epoch.env.sat": 0.0893854748603352, "success_rate.epoch.env.science": 0.9509803921568627, "success_rate.epoch.env.webshop": 0.4, "success_rate.epoch.env_macro_mean": 0.5764941513506543, "success_rate.epoch.global": 0.845866417561887, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9921518264840182, "tokens_p.mean_in_band": 0.6305803571428571, "tokens_rate.above_band": 0.8866396761133604, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11336032388663968 }, { "epoch": 1.424534574468085, "grad_norm": 30.845878216754365, "learning_rate": 1.679263055074101e-07, "loss": 0.2865, "step": 8570, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45544554455445546, "success_rate.epoch.env.logic": 0.5826612903225806, "success_rate.epoch.env.math": 0.9715242881072027, "success_rate.epoch.env.sat": 0.0893854748603352, "success_rate.epoch.env.science": 0.9510514865844815, "success_rate.epoch.env.webshop": 0.4, "success_rate.epoch.env_macro_mean": 0.5765136887283225, "success_rate.epoch.global": 0.8461179762182327, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9976851851851852, "tokens_p.mean_in_band": 0.44503348214285715, "tokens_rate.above_band": 0.9665871121718377, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03341288782816229 }, { "epoch": 1.4253656914893618, "grad_norm": 68.72044706609415, "learning_rate": 1.6790507502334396e-07, "loss": 0.2513, "step": 8575, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46078431372549017, "success_rate.epoch.env.logic": 0.5814889336016097, "success_rate.epoch.env.math": 0.9715242881072027, "success_rate.epoch.env.sat": 0.08888888888888889, "success_rate.epoch.env.science": 0.9511400651465798, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5859462710047513, "success_rate.epoch.global": 0.845974872033504, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9924175126903553, "tokens_p.mean_in_band": 0.6511182598039216, "tokens_rate.above_band": 0.9507722007722008, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04922779922779923 }, { "epoch": 1.4261968085106382, "grad_norm": 44.67211750077507, "learning_rate": 1.6788386600480658e-07, "loss": 0.2824, "step": 8580, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46078431372549017, "success_rate.epoch.env.logic": 0.5803212851405622, "success_rate.epoch.env.math": 0.9715242881072027, "success_rate.epoch.env.sat": 0.08888888888888889, "success_rate.epoch.env.science": 0.9512459371614301, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5858497458732788, "success_rate.epoch.global": 0.845993031358885, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9945693597560976, "tokens_p.mean_in_band": 0.5940290178571429, "tokens_rate.above_band": 0.9590643274853801, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04093567251461988 }, { "epoch": 1.427027925531915, "grad_norm": 80.3944084089916, "learning_rate": 1.678626784894387e-07, "loss": 0.2373, "step": 8585, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46078431372549017, "success_rate.epoch.env.logic": 0.5803212851405622, "success_rate.epoch.env.math": 0.9715242881072027, "success_rate.epoch.env.sat": 0.08888888888888889, "success_rate.epoch.env.science": 0.9513513513513514, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5858593289814533, "success_rate.epoch.global": 0.8462073764787752, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9902573529411764, "tokens_p.mean_in_band": 0.8372395833333334, "tokens_rate.above_band": 0.9826589595375722, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017341040462427744 }, { "epoch": 1.4278590425531914, "grad_norm": 122.8034342934403, "learning_rate": 1.67841512514843e-07, "loss": 0.2955, "step": 8590, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46078431372549017, "success_rate.epoch.env.logic": 0.5811623246492986, "success_rate.epoch.env.math": 0.9716193656093489, "success_rate.epoch.env.sat": 0.09392265193370165, "success_rate.epoch.env.science": 0.9514038876889849, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.586406821380847, "success_rate.epoch.global": 0.8464566929133859, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.994410569105691, "tokens_p.mean_in_band": 0.76953125, "tokens_rate.above_band": 0.9879518072289156, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012048192771084338 }, { "epoch": 1.428690159574468, "grad_norm": 104.24034106569569, "learning_rate": 1.6782036811858386e-07, "loss": 0.3699, "step": 8595, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46078431372549017, "success_rate.epoch.env.logic": 0.582, "success_rate.epoch.env.math": 0.9716193656093489, "success_rate.epoch.env.sat": 0.09392265193370165, "success_rate.epoch.env.science": 0.9514563106796117, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5864877394118768, "success_rate.epoch.global": 0.8465987968533086, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9903645833333333, "tokens_p.mean_below_band": 1.4915713109076023e-10, "tokens_p.mean_in_band": 0.8546875, "tokens_rate.above_band": 0.975609756097561, "tokens_rate.below_band": 0.0040650406504065045, "tokens_rate.in_band": 0.02032520325203252 }, { "epoch": 1.4295212765957448, "grad_norm": 39.72259975094303, "learning_rate": 1.6779924533818745e-07, "loss": 0.2195, "step": 8600, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46078431372549017, "success_rate.epoch.env.logic": 0.582, "success_rate.epoch.env.math": 0.9716666666666667, "success_rate.epoch.env.sat": 0.09392265193370165, "success_rate.epoch.env.science": 0.9515781922525107, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5865031196509874, "success_rate.epoch.global": 0.8468822170900693, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9849837662337663, "tokens_p.mean_in_band": 0.8463541666666666, "tokens_rate.above_band": 0.9808917197452229, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01910828025477707 }, { "epoch": 1.4303523936170213, "grad_norm": 100.07232165750263, "learning_rate": 1.6777814421114155e-07, "loss": 0.3136, "step": 8605, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4563106796116505, "success_rate.epoch.env.logic": 0.5808383233532934, "success_rate.epoch.env.math": 0.9717607973421927, "success_rate.epoch.env.sat": 0.09392265193370165, "success_rate.epoch.env.science": 0.951647564469914, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5860056825721132, "success_rate.epoch.global": 0.8467035500230521, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9977895480225989, "tokens_p.mean_in_band": 0.5431937172774869, "tokens_rate.above_band": 0.9586221837088388, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.041377816291161176 }, { "epoch": 1.4311835106382977, "grad_norm": 88.49918466277656, "learning_rate": 1.6775706477489542e-07, "loss": 0.1981, "step": 8610, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4563106796116505, "success_rate.epoch.env.logic": 0.5808383233532934, "success_rate.epoch.env.math": 0.971947194719472, "success_rate.epoch.env.sat": 0.09392265193370165, "success_rate.epoch.env.science": 0.9517340007150519, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5860304856286965, "success_rate.epoch.global": 0.8470209339774557, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9852642276422764, "tokens_p.mean_in_band": 0.88125, "tokens_rate.above_band": 0.9609375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0390625 }, { "epoch": 1.4320146276595744, "grad_norm": 17.35600566267856, "learning_rate": 1.6773600706685992e-07, "loss": 0.3189, "step": 8615, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.44954128440366975, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5808383233532934, "success_rate.epoch.env.math": 0.9720394736842105, "success_rate.epoch.env.sat": 0.09340659340659341, "success_rate.epoch.env.science": 0.9517857142857142, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.586061137563146, "success_rate.epoch.global": 0.8468427095292766, "success_rate.window.env.agentgym:alfworld": 0.6666666666666666, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9923309682804674, "tokens_p.mean_in_band": 0.6994392641129032, "tokens_rate.above_band": 0.9062027231467473, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09379727685325265 }, { "epoch": 1.4328457446808511, "grad_norm": 63.43731999211111, "learning_rate": 1.6771497112440735e-07, "loss": 0.2726, "step": 8620, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.44954128440366975, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5808383233532934, "success_rate.epoch.env.math": 0.9720394736842105, "success_rate.epoch.env.sat": 0.09340659340659341, "success_rate.epoch.env.science": 0.9512455516014234, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5860120318645742, "success_rate.epoch.global": 0.8467353951890034, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9913194444444444, "tokens_p.mean_in_band": 0.6731770833333334, "tokens_rate.above_band": 0.9473684210526315, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05263157894736842 }, { "epoch": 1.4336768617021276, "grad_norm": 54.61369506875519, "learning_rate": 1.6769395698487138e-07, "loss": 0.4171, "step": 8625, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5808383233532934, "success_rate.epoch.env.math": 0.9721311475409836, "success_rate.epoch.env.sat": 0.09340659340659341, "success_rate.epoch.env.science": 0.9513321492007105, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5861108898193778, "success_rate.epoch.global": 0.846822130772748, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977897838899804, "tokens_p.mean_in_band": 0.7161458333333334, "tokens_rate.above_band": 0.9883495145631068, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011650485436893204 }, { "epoch": 1.4345079787234043, "grad_norm": 304.3190549004163, "learning_rate": 1.6767296468554685e-07, "loss": 0.2495, "step": 8630, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5816733067729084, "success_rate.epoch.env.math": 0.9722222222222222, "success_rate.epoch.env.sat": 0.09289617486338798, "success_rate.epoch.env.science": 0.951418439716312, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5861565198260369, "success_rate.epoch.global": 0.8469085101528633, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959177927927928, "tokens_p.mean_in_band": 0.6796875, "tokens_rate.above_band": 0.9652173913043478, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034782608695652174 }, { "epoch": 1.4353390957446808, "grad_norm": 312.434319232639, "learning_rate": 1.6765199426369007e-07, "loss": 0.2669, "step": 8635, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5816733067729084, "success_rate.epoch.env.math": 0.9722675367047309, "success_rate.epoch.env.sat": 0.09289617486338798, "success_rate.epoch.env.science": 0.9515729939908095, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5861746897130374, "success_rate.epoch.global": 0.8472569997723651, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9910102739726028, "tokens_p.mean_in_band": 0.8854166666666666, "tokens_rate.above_band": 0.9798657718120806, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020134228187919462 }, { "epoch": 1.4361702127659575, "grad_norm": 424.42794893225584, "learning_rate": 1.676310457565183e-07, "loss": 0.2701, "step": 8640, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5816733067729084, "success_rate.epoch.env.math": 0.9723127035830619, "success_rate.epoch.env.sat": 0.09239130434782608, "success_rate.epoch.env.science": 0.9516413695728909, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5861391144352965, "success_rate.epoch.global": 0.8472380086383269, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9874625748502994, "tokens_p.mean_below_band": 3.0547380447387695e-07, "tokens_p.mean_in_band": 0.669677734375, "tokens_rate.above_band": 0.907608695652174, "tokens_rate.below_band": 0.005434782608695652, "tokens_rate.in_band": 0.08695652173913043 }, { "epoch": 1.437001329787234, "grad_norm": 46.119291957804386, "learning_rate": 1.676101192012101e-07, "loss": 0.3312, "step": 8645, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5816733067729084, "success_rate.epoch.env.math": 0.9723127035830619, "success_rate.epoch.env.sat": 0.09239130434782608, "success_rate.epoch.env.science": 0.9516925246826516, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5861437648998202, "success_rate.epoch.global": 0.8473421172194457, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9958314167433303, "tokens_p.mean_in_band": 0.6761762640449438, "tokens_rate.above_band": 0.9243197278911565, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07568027210884354 }, { "epoch": 1.4378324468085106, "grad_norm": 255.95866811533935, "learning_rate": 1.675892146349049e-07, "loss": 0.2728, "step": 8650, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5813492063492064, "success_rate.epoch.env.math": 0.9723127035830619, "success_rate.epoch.env.sat": 0.09239130434782608, "success_rate.epoch.env.science": 0.9518114667604644, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.586125114141103, "success_rate.epoch.global": 0.8474268873271367, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966023489932886, "tokens_p.mean_in_band": 0.6751302083333334, "tokens_rate.above_band": 0.9841479524438573, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015852047556142668 }, { "epoch": 1.4386635638297873, "grad_norm": 29.06535848688785, "learning_rate": 1.6756833209470323e-07, "loss": 0.2309, "step": 8655, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5798816568047337, "success_rate.epoch.env.math": 0.9724473257698542, "success_rate.epoch.env.sat": 0.09239130434782608, "success_rate.epoch.env.science": 0.9518453427065027, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5860070185582266, "success_rate.epoch.global": 0.8472505091649695, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9978480538922155, "tokens_p.mean_in_band": 0.5659327651515151, "tokens_rate.above_band": 0.9758948137326515, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02410518626734843 }, { "epoch": 1.4394946808510638, "grad_norm": 240.5160701591656, "learning_rate": 1.6754747161766648e-07, "loss": 0.2569, "step": 8660, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5798816568047337, "success_rate.epoch.env.math": 0.9724473257698542, "success_rate.epoch.env.sat": 0.09239130434782608, "success_rate.epoch.env.science": 0.9519298245614035, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5860146987268539, "success_rate.epoch.global": 0.8474231464737794, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9938063063063063, "tokens_p.mean_in_band": 0.6397758152173914, "tokens_rate.above_band": 0.9666182873730044, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033381712626995644 }, { "epoch": 1.4403257978723405, "grad_norm": 39.62803208176381, "learning_rate": 1.6752663324081687e-07, "loss": 0.2168, "step": 8665, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5807086614173228, "success_rate.epoch.env.math": 0.9725363489499192, "success_rate.epoch.env.sat": 0.0918918918918919, "success_rate.epoch.env.science": 0.9519971969166082, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5860586976079379, "success_rate.epoch.global": 0.8474729241877257, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9920528017241379, "tokens_p.mean_in_band": 0.6429227941176471, "tokens_rate.above_band": 0.9317269076305221, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06827309236947791 }, { "epoch": 1.441156914893617, "grad_norm": 61.07512049302331, "learning_rate": 1.6750581700113742e-07, "loss": 0.2246, "step": 8670, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5803921568627451, "success_rate.epoch.env.math": 0.9725806451612903, "success_rate.epoch.env.sat": 0.0918918918918919, "success_rate.epoch.env.science": 0.952047602380119, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5860385337097838, "success_rate.epoch.global": 0.8474538080216314, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9962631406044679, "tokens_p.mean_in_band": 0.6515066964285714, "tokens_rate.above_band": 0.9819354838709677, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01806451612903226 }, { "epoch": 1.4419880319148937, "grad_norm": 67.94565280718675, "learning_rate": 1.6748502293557184e-07, "loss": 0.1965, "step": 8675, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5803921568627451, "success_rate.epoch.env.math": 0.9726247987117552, "success_rate.epoch.env.sat": 0.0918918918918919, "success_rate.epoch.env.science": 0.9517988124345093, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5860199304011342, "success_rate.epoch.global": 0.8474690663667042, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9913321167883211, "tokens_p.mean_below_band": 1.955777406692505e-08, "tokens_p.mean_in_band": 0.75, "tokens_rate.above_band": 0.9647887323943662, "tokens_rate.below_band": 0.007042253521126761, "tokens_rate.in_band": 0.028169014084507043 }, { "epoch": 1.4428191489361701, "grad_norm": 164.19239948795624, "learning_rate": 1.6746425108102456e-07, "loss": 0.2735, "step": 8680, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5792563600782779, "success_rate.epoch.env.math": 0.9726688102893891, "success_rate.epoch.env.sat": 0.0918918918918919, "success_rate.epoch.env.science": 0.9519163763066202, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5859313648252503, "success_rate.epoch.global": 0.8475527615626404, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964849768875192, "tokens_p.mean_in_band": 0.6681385869565217, "tokens_rate.above_band": 0.9657738095238095, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03422619047619048 }, { "epoch": 1.4436502659574468, "grad_norm": 55.10663821114271, "learning_rate": 1.6744350147436046e-07, "loss": 0.2592, "step": 8685, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5789473684210527, "success_rate.epoch.env.math": 0.9726688102893891, "success_rate.epoch.env.sat": 0.0918918918918919, "success_rate.epoch.env.science": 0.9516689847009736, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5858807845286257, "success_rate.epoch.global": 0.8473778574630211, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973628691983122, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.9875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0125 }, { "epoch": 1.4444813829787235, "grad_norm": 29.708229339774526, "learning_rate": 1.6742277415240505e-07, "loss": 0.2786, "step": 8690, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5789473684210527, "success_rate.epoch.env.math": 0.9726688102893891, "success_rate.epoch.env.sat": 0.0918918918918919, "success_rate.epoch.env.science": 0.9518197573656846, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5858944911345084, "success_rate.epoch.global": 0.8476850816372177, "success_rate.window.env.ded": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9902673545966229, "tokens_p.mean_in_band": 0.6785714285714286, "tokens_rate.above_band": 0.8541666666666666, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14583333333333334 }, { "epoch": 1.4453125, "grad_norm": 39.03621860273987, "learning_rate": 1.6740206915194423e-07, "loss": 0.3147, "step": 8695, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.45045045045045046, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5766990291262136, "success_rate.epoch.env.math": 0.9728, "success_rate.epoch.env.sat": 0.0913978494623656, "success_rate.epoch.env.science": 0.9518531347419467, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5856601443492818, "success_rate.epoch.global": 0.8472873409243135, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9974712323682257, "tokens_p.mean_in_band": 0.65078125, "tokens_rate.above_band": 0.9573560767590619, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.042643923240938165 }, { "epoch": 1.4461436170212765, "grad_norm": 225.77303993765307, "learning_rate": 1.6738138650972434e-07, "loss": 0.2559, "step": 8700, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5775193798449613, "success_rate.epoch.env.math": 0.9728867623604466, "success_rate.epoch.env.sat": 0.0913978494623656, "success_rate.epoch.env.science": 0.9518864659051575, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5853800139147842, "success_rate.epoch.global": 0.8472686733556298, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9967648678414097, "tokens_p.mean_in_band": 0.7265625, "tokens_rate.above_band": 0.9826839826839827, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017316017316017316 }, { "epoch": 1.4469747340425532, "grad_norm": 680.9468878174185, "learning_rate": 1.6736072626245194e-07, "loss": 0.3047, "step": 8705, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5775193798449613, "success_rate.epoch.env.math": 0.972972972972973, "success_rate.epoch.env.sat": 0.09090909090909091, "success_rate.epoch.env.science": 0.951969592259848, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5853509755887791, "success_rate.epoch.global": 0.8473180503004674, "success_rate.window.env.math": 0.75, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9914596273291926, "tokens_p.mean_below_band": 8.149072527885437e-09, "tokens_p.mean_in_band": 0.5965073529411765, "tokens_rate.above_band": 0.8994413407821229, "tokens_rate.below_band": 0.00558659217877095, "tokens_rate.in_band": 0.09497206703910614 }, { "epoch": 1.4478058510638299, "grad_norm": 62.860235008006676, "learning_rate": 1.6734008844679396e-07, "loss": 0.4091, "step": 8710, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.4424778761061947, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5783365570599613, "success_rate.epoch.env.math": 0.972972972972973, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.951986183074266, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5854611644102378, "success_rate.epoch.global": 0.8470431302801245, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5666666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9972527472527473, "tokens_p.mean_in_band": 0.6467578125, "tokens_rate.above_band": 0.9667994687915007, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033200531208499334 }, { "epoch": 1.4486369680851063, "grad_norm": 55.9695147997463, "learning_rate": 1.6731947309937744e-07, "loss": 0.2228, "step": 8715, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5772200772200772, "success_rate.epoch.env.math": 0.972972972972973, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.9520358868184955, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5850113315944395, "success_rate.epoch.global": 0.8467688207861426, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952417695473251, "tokens_p.mean_in_band": 0.6136067708333334, "tokens_rate.above_band": 0.9818181818181818, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01818181818181818 }, { "epoch": 1.449468085106383, "grad_norm": 45.446571894704654, "learning_rate": 1.672988802567896e-07, "loss": 0.1526, "step": 8720, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4519230769230769, "success_rate.epoch.env.logic": 0.5761078998073218, "success_rate.epoch.env.math": 0.973015873015873, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.9521349862258953, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5849231335978528, "success_rate.epoch.global": 0.8468188871647085, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9963672969187675, "tokens_p.mean_in_band": 0.6397372159090909, "tokens_rate.above_band": 0.9848275862068966, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015172413793103448 }, { "epoch": 1.4502992021276595, "grad_norm": 47.457035107552926, "learning_rate": 1.6727830995557766e-07, "loss": 0.3986, "step": 8725, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45714285714285713, "success_rate.epoch.env.logic": 0.5769230769230769, "success_rate.epoch.env.math": 0.9731012658227848, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.952200825309491, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5854855144365839, "success_rate.epoch.global": 0.8470900641734898, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99609375, "tokens_p.mean_below_band": 5.893525667488575e-10, "tokens_p.mean_in_band": 0.8854166666666666, "tokens_rate.above_band": 0.9959654178674352, "tokens_rate.below_band": 0.0005763688760806917, "tokens_rate.in_band": 0.00345821325648415 }, { "epoch": 1.4511303191489362, "grad_norm": 91.4597482139096, "learning_rate": 1.6725776223224885e-07, "loss": 0.2718, "step": 8730, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45714285714285713, "success_rate.epoch.env.logic": 0.5769230769230769, "success_rate.epoch.env.math": 0.9731012658227848, "success_rate.epoch.env.sat": 0.09523809523809523, "success_rate.epoch.env.science": 0.9519395811877789, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5854617649709737, "success_rate.epoch.global": 0.8470380194518126, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9868683510638298, "tokens_p.mean_below_band": 1.8416358216200024e-08, "tokens_p.mean_in_band": 0.5908203125, "tokens_rate.above_band": 0.912621359223301, "tokens_rate.below_band": 0.009708737864077669, "tokens_rate.in_band": 0.07766990291262135 }, { "epoch": 1.4519614361702127, "grad_norm": 22.796198905348415, "learning_rate": 1.6723723712327042e-07, "loss": 0.2272, "step": 8735, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45714285714285713, "success_rate.epoch.env.logic": 0.5769230769230769, "success_rate.epoch.env.math": 0.9731012658227848, "success_rate.epoch.env.sat": 0.09473684210526316, "success_rate.epoch.env.science": 0.9520219328307059, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5854236830173459, "success_rate.epoch.global": 0.8470198675496688, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9911111111111112, "tokens_p.mean_in_band": 0.72216796875, "tokens_rate.above_band": 0.8754863813229572, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1245136186770428 }, { "epoch": 1.4527925531914894, "grad_norm": 54.05771872340115, "learning_rate": 1.672167346650693e-07, "loss": 0.3134, "step": 8740, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45714285714285713, "success_rate.epoch.env.logic": 0.5769230769230769, "success_rate.epoch.env.math": 0.973186119873817, "success_rate.epoch.env.sat": 0.09473684210526316, "success_rate.epoch.env.science": 0.9521040027369141, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5854388579225497, "success_rate.epoch.global": 0.8472558959664976, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9901889534883721, "tokens_p.mean_in_band": 0.8776041666666666, "tokens_rate.above_band": 0.9662921348314607, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033707865168539325 }, { "epoch": 1.453623670212766, "grad_norm": 120.17919160073394, "learning_rate": 1.6719625489403243e-07, "loss": 0.2402, "step": 8745, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4528301886792453, "success_rate.epoch.env.logic": 0.5777351247600768, "success_rate.epoch.env.math": 0.973186119873817, "success_rate.epoch.env.sat": 0.09473684210526316, "success_rate.epoch.env.science": 0.9521857923497268, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5851280551031134, "success_rate.epoch.global": 0.8472711267605634, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9925246891002194, "tokens_p.mean_in_band": 0.5591957885304659, "tokens_rate.above_band": 0.830498177399757, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16950182260024302 }, { "epoch": 1.4544547872340425, "grad_norm": 100.65369323379176, "learning_rate": 1.6717579784650635e-07, "loss": 0.1957, "step": 8750, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45794392523364486, "success_rate.epoch.env.logic": 0.578544061302682, "success_rate.epoch.env.math": 0.973186119873817, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.9522184300341296, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.585579734073672, "success_rate.epoch.global": 0.847032967032967, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9948308270676691, "tokens_p.mean_in_band": 0.697985197368421, "tokens_rate.above_band": 0.9130434782608695, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08695652173913043 }, { "epoch": 1.4552859042553192, "grad_norm": 63.25419099890285, "learning_rate": 1.671553635587973e-07, "loss": 0.3672, "step": 8755, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45794392523364486, "success_rate.epoch.env.logic": 0.578544061302682, "success_rate.epoch.env.math": 0.973186119873817, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.9523323118828737, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5855900869690124, "success_rate.epoch.global": 0.847267939433838, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9884982638888888, "tokens_p.mean_in_band": 0.802734375, "tokens_rate.above_band": 0.9473684210526315, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05263157894736842 }, { "epoch": 1.4561170212765957, "grad_norm": 67.16979030932264, "learning_rate": 1.6713495206717115e-07, "loss": 0.2948, "step": 8760, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45794392523364486, "success_rate.epoch.env.logic": 0.578544061302682, "success_rate.epoch.env.math": 0.97339593114241, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.9524133242692047, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5856165254830963, "success_rate.epoch.global": 0.8476023647908911, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9909957627118644, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.4569481382978724, "grad_norm": 49.82950088176118, "learning_rate": 1.6711456340785325e-07, "loss": 0.1949, "step": 8765, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45794392523364486, "success_rate.epoch.env.logic": 0.578544061302682, "success_rate.epoch.env.math": 0.9734789391575663, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.9525101763907734, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5856328764046168, "success_rate.epoch.global": 0.8478688524590164, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9852941176470589, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.4577792553191489, "grad_norm": 1.2199100707662283, "learning_rate": 1.6709419761702856e-07, "loss": 0.2266, "step": 8770, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4537037037037037, "success_rate.epoch.env.logic": 0.578544061302682, "success_rate.epoch.env.math": 0.973561430793157, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.9525262800949474, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5852563649327825, "success_rate.epoch.global": 0.8477833588119676, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9914198234121333, "tokens_p.mean_below_band": 3.0547380447387695e-07, "tokens_p.mean_in_band": 0.49826959978070173, "tokens_rate.above_band": 0.793625678119349, "tokens_rate.below_band": 0.0002260397830018083, "tokens_rate.in_band": 0.20614828209764918 }, { "epoch": 1.4586103723404256, "grad_norm": 125.04770250388152, "learning_rate": 1.670738547308413e-07, "loss": 0.3216, "step": 8775, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45871559633027525, "success_rate.epoch.env.logic": 0.578544061302682, "success_rate.epoch.env.math": 0.9736024844720497, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.9525745257452575, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5857201096560347, "success_rate.epoch.global": 0.8479493891797557, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9985160013764625, "tokens_p.mean_in_band": 0.23349216331269348, "tokens_rate.above_band": 0.8181306306306306, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18186936936936937 }, { "epoch": 1.4594414893617023, "grad_norm": 37.293791066211455, "learning_rate": 1.670535347853951e-07, "loss": 0.2724, "step": 8780, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45871559633027525, "success_rate.epoch.env.logic": 0.5801526717557252, "success_rate.epoch.env.math": 0.9736434108527132, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.952638700947226, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5858759016592779, "success_rate.epoch.global": 0.8481812241341755, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9917763157894737, "tokens_p.mean_in_band": 0.8169642857142857, "tokens_rate.above_band": 0.976027397260274, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023972602739726026 }, { "epoch": 1.4602726063829787, "grad_norm": 38.87283598834505, "learning_rate": 1.6703323781675297e-07, "loss": 0.3142, "step": 8785, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45871559633027525, "success_rate.epoch.env.logic": 0.5798479087452472, "success_rate.epoch.env.math": 0.9736434108527132, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.952686718485975, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.585852561161848, "success_rate.epoch.global": 0.8481288076588338, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9941100303183426, "tokens_p.mean_below_band": 7.003545761108398e-07, "tokens_p.mean_in_band": 0.4973266786798179, "tokens_rate.above_band": 0.8568954319116692, "tokens_rate.below_band": 0.00043299415457891317, "tokens_rate.in_band": 0.1426715739337519 }, { "epoch": 1.4611037234042552, "grad_norm": 54.46552396007076, "learning_rate": 1.6701296386093698e-07, "loss": 0.4159, "step": 8790, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.5798479087452472, "success_rate.epoch.env.math": 0.9736434108527132, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.9527824620573356, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5854821613242608, "success_rate.epoch.global": 0.8481425157505974, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944903581267218, "tokens_p.mean_below_band": 1.3535221417744955e-07, "tokens_p.mean_in_band": 0.48112277809633025, "tokens_rate.above_band": 0.9009476534296029, "tokens_rate.below_band": 0.0006768953068592057, "tokens_rate.in_band": 0.0983754512635379 }, { "epoch": 1.461934840425532, "grad_norm": 268.13693865907, "learning_rate": 1.6699271295392846e-07, "loss": 0.2378, "step": 8795, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43859649122807015, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.5787476280834914, "success_rate.epoch.env.math": 0.9737654320987654, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.9528301886792453, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5853975674339158, "success_rate.epoch.global": 0.8481561822125814, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9971122112211221, "tokens_p.mean_in_band": 0.5791015625, "tokens_rate.above_band": 0.9588607594936709, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04113924050632911 }, { "epoch": 1.4627659574468086, "grad_norm": 264.3785545733138, "learning_rate": 1.6697248513166778e-07, "loss": 0.3537, "step": 8800, "success_rate.epoch.env.abd": 0.5925925925925926, "success_rate.epoch.env.agentgym:alfworld": 0.43478260869565216, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.5787476280834914, "success_rate.epoch.env.math": 0.9737654320987654, "success_rate.epoch.env.sat": 0.09375, "success_rate.epoch.env.science": 0.9525731584258325, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5850274844533858, "success_rate.epoch.global": 0.8479202772963604, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9926202623906706, "tokens_p.mean_below_band": 1.0089706847793423e-12, "tokens_p.mean_in_band": 0.7724609375, "tokens_rate.above_band": 0.9581005586592178, "tokens_rate.below_band": 0.002793296089385475, "tokens_rate.in_band": 0.03910614525139665 }, { "epoch": 1.463597074468085, "grad_norm": 81.86714619302373, "learning_rate": 1.6695228043005444e-07, "loss": 0.2204, "step": 8805, "success_rate.epoch.env.abd": 0.6071428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.43478260869565216, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.5795454545454546, "success_rate.epoch.env.math": 0.9737654320987654, "success_rate.epoch.env.sat": 0.09326424870466321, "success_rate.epoch.env.science": 0.9526527871054399, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5863858452167038, "success_rate.epoch.global": 0.8479671280276817, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9935286103542235, "tokens_p.mean_in_band": 0.7521484375, "tokens_rate.above_band": 0.9483204134366925, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05167958656330749 }, { "epoch": 1.4644281914893618, "grad_norm": 184.7918911546684, "learning_rate": 1.6693209888494674e-07, "loss": 0.2703, "step": 8810, "success_rate.epoch.env.abd": 0.6071428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.4396551724137931, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.5795454545454546, "success_rate.epoch.env.math": 0.9738058551617874, "success_rate.epoch.env.sat": 0.09326424870466321, "success_rate.epoch.env.science": 0.9527321488434461, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5868396950820828, "success_rate.epoch.global": 0.8481969337076225, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976421734234234, "tokens_p.mean_in_band": 0.7494419642857143, "tokens_rate.above_band": 0.9844789356984479, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015521064301552107 }, { "epoch": 1.4652593085106382, "grad_norm": 17.43720399263593, "learning_rate": 1.6691194053216205e-07, "loss": 0.3211, "step": 8815, "success_rate.epoch.env.abd": 0.6071428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.4396551724137931, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.5792452830188679, "success_rate.epoch.env.math": 0.9738461538461538, "success_rate.epoch.env.sat": 0.09326424870466321, "success_rate.epoch.env.science": 0.9528270324523251, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.58682469606087, "success_rate.epoch.global": 0.8482758620689655, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9953687673130194, "tokens_p.mean_in_band": 0.505078125, "tokens_rate.above_band": 0.9601063829787234, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0398936170212766 }, { "epoch": 1.466090425531915, "grad_norm": 69.06664856560502, "learning_rate": 1.668918054074765e-07, "loss": 0.2096, "step": 8820, "success_rate.epoch.env.abd": 0.6071428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.4396551724137931, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45045045045045046, "success_rate.epoch.env.logic": 0.5800376647834274, "success_rate.epoch.env.math": 0.9738863287250384, "success_rate.epoch.env.sat": 0.09326424870466321, "success_rate.epoch.env.science": 0.9529058116232465, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.586535271671721, "success_rate.epoch.global": 0.8483218588640276, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9917002012072434, "tokens_p.mean_below_band": 6.891787052154541e-07, "tokens_p.mean_in_band": 0.6647329757462687, "tokens_rate.above_band": 0.8804251550044287, "tokens_rate.below_band": 0.0008857395925597874, "tokens_rate.in_band": 0.11868910540301152 }, { "epoch": 1.4669215425531914, "grad_norm": 74.35927383568033, "learning_rate": 1.6687169354662497e-07, "loss": 0.1666, "step": 8825, "success_rate.epoch.env.abd": 0.6071428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.4396551724137931, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45045045045045046, "success_rate.epoch.env.logic": 0.5789473684210527, "success_rate.epoch.env.math": 0.9739263803680982, "success_rate.epoch.env.sat": 0.09326424870466321, "success_rate.epoch.env.science": 0.9529843281093698, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5864469327414308, "success_rate.epoch.global": 0.8483351235230935, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944362017804155, "tokens_p.mean_in_band": 0.673583984375, "tokens_rate.above_band": 0.9768115942028985, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02318840579710145 }, { "epoch": 1.467752659574468, "grad_norm": 139.5242283709667, "learning_rate": 1.6685160498530113e-07, "loss": 0.3972, "step": 8830, "success_rate.epoch.env.abd": 0.6071428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.4358974358974359, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45045045045045046, "success_rate.epoch.env.logic": 0.5797373358348968, "success_rate.epoch.env.math": 0.9739263803680982, "success_rate.epoch.env.sat": 0.09278350515463918, "success_rate.epoch.env.science": 0.9530625832223701, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.586140545692382, "success_rate.epoch.global": 0.8481664164700836, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9913269731136166, "tokens_p.mean_in_band": 0.6199721534653465, "tokens_rate.above_band": 0.8509225092250923, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14907749077490776 }, { "epoch": 1.4685837765957448, "grad_norm": 77.31867551880306, "learning_rate": 1.668315397591572e-07, "loss": 0.2762, "step": 8835, "success_rate.epoch.env.abd": 0.6071428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.4358974358974359, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45045045045045046, "success_rate.epoch.env.logic": 0.5797373358348968, "success_rate.epoch.env.math": 0.9739663093415007, "success_rate.epoch.env.sat": 0.09278350515463918, "success_rate.epoch.env.science": 0.953125, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5861498498515668, "success_rate.epoch.global": 0.8483290488431876, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9960516178736518, "tokens_p.mean_in_band": 0.5710227272727273, "tokens_rate.above_band": 0.9833333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016666666666666666 }, { "epoch": 1.4694148936170213, "grad_norm": 74.17165133158827, "learning_rate": 1.6681149790380409e-07, "loss": 0.3561, "step": 8840, "success_rate.epoch.env.abd": 0.6071428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.4358974358974359, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45045045045045046, "success_rate.epoch.env.logic": 0.5805243445692884, "success_rate.epoch.env.math": 0.9740061162079511, "success_rate.epoch.env.sat": 0.09278350515463918, "success_rate.epoch.env.science": 0.953187250996016, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5862306740876448, "success_rate.epoch.global": 0.8485237483953787, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9940378289473685, "tokens_p.mean_in_band": 0.66796875, "tokens_rate.above_band": 0.9470404984423676, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0529595015576324 }, { "epoch": 1.4702460106382977, "grad_norm": 50.42447448470959, "learning_rate": 1.6679147945481116e-07, "loss": 0.3303, "step": 8845, "success_rate.epoch.env.abd": 0.6071428571428571, "success_rate.epoch.env.agentgym:alfworld": 0.4358974358974359, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5805243445692884, "success_rate.epoch.env.math": 0.9740458015267176, "success_rate.epoch.env.sat": 0.09278350515463918, "success_rate.epoch.env.science": 0.9529489728296885, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5858469948267867, "success_rate.epoch.global": 0.8483554036736437, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9943889371560961, "tokens_p.mean_below_band": 4.9715708883013576e-08, "tokens_p.mean_in_band": 0.511228491902834, "tokens_rate.above_band": 0.8743985819194733, "tokens_rate.below_band": 0.0005064573309698658, "tokens_rate.in_band": 0.12509496074955684 }, { "epoch": 1.4710771276595744, "grad_norm": 100.25207053452297, "learning_rate": 1.6677148444770627e-07, "loss": 0.4462, "step": 8850, "success_rate.epoch.env.abd": 0.5862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.4358974358974359, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5813084112149532, "success_rate.epoch.env.math": 0.9740458015267176, "success_rate.epoch.env.sat": 0.09230769230769231, "success_rate.epoch.env.science": 0.9529956967891426, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5839759963874266, "success_rate.epoch.global": 0.8481228668941979, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.41666666666666663, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9969135802469136, "tokens_p.mean_below_band": 2.814663781060113e-07, "tokens_p.mean_in_band": 0.10443947885677858, "tokens_rate.above_band": 0.27390101444820164, "tokens_rate.below_band": 0.0027666769136181985, "tokens_rate.in_band": 0.7233323086381801 }, { "epoch": 1.4719082446808511, "grad_norm": 85.29556765683408, "learning_rate": 1.6675151291797562e-07, "loss": 0.3214, "step": 8855, "success_rate.epoch.env.abd": 0.5862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.4358974358974359, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5813084112149532, "success_rate.epoch.env.math": 0.974124809741248, "success_rate.epoch.env.sat": 0.09230769230769231, "success_rate.epoch.env.science": 0.9530578512396695, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5839888293569773, "success_rate.epoch.global": 0.8483170004260758, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974102209944752, "tokens_p.mean_in_band": 0.58203125, "tokens_rate.above_band": 0.9890710382513661, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01092896174863388 }, { "epoch": 1.4727393617021276, "grad_norm": 153.86705675307914, "learning_rate": 1.6673156490106382e-07, "loss": 0.4592, "step": 8860, "success_rate.epoch.env.abd": 0.5862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.4358974358974359, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.582089552238806, "success_rate.epoch.env.math": 0.974124809741248, "success_rate.epoch.env.sat": 0.09230769230769231, "success_rate.epoch.env.science": 0.9531353135313532, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5840668842038442, "success_rate.epoch.global": 0.8485106382978723, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980090027700831, "tokens_p.mean_in_band": 0.5875, "tokens_rate.above_band": 0.9537648612945839, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.046235138705416116 }, { "epoch": 1.4735704787234043, "grad_norm": 215.51987325878264, "learning_rate": 1.667116404323737e-07, "loss": 0.2616, "step": 8865, "success_rate.epoch.env.abd": 0.5862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.4358974358974359, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.582089552238806, "success_rate.epoch.env.math": 0.9741641337386018, "success_rate.epoch.env.sat": 0.09230769230769231, "success_rate.epoch.env.science": 0.9531662269129287, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5840732694201104, "success_rate.epoch.global": 0.8486072719540718, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9868197278911565, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.4744015957446808, "grad_norm": 57.22878032157092, "learning_rate": 1.666917395472663e-07, "loss": 0.1699, "step": 8870, "success_rate.epoch.env.abd": 0.5862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.4358974358974359, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5828677839851024, "success_rate.epoch.env.math": 0.9741641337386018, "success_rate.epoch.env.sat": 0.09230769230769231, "success_rate.epoch.env.science": 0.9532125205930807, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5841482262770603, "success_rate.epoch.global": 0.8487359252177608, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9923321759259259, "tokens_p.mean_below_band": 7.851497230149107e-13, "tokens_p.mean_in_band": 0.798828125, "tokens_rate.above_band": 0.96, "tokens_rate.below_band": 0.0044444444444444444, "tokens_rate.in_band": 0.035555555555555556 }, { "epoch": 1.4752327127659575, "grad_norm": 60.02260851710756, "learning_rate": 1.6667186228106086e-07, "loss": 0.328, "step": 8875, "success_rate.epoch.env.abd": 0.5862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.4358974358974359, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5825602968460112, "success_rate.epoch.env.math": 0.9742813918305597, "success_rate.epoch.env.sat": 0.09230769230769231, "success_rate.epoch.env.science": 0.9532741033234616, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5841365311573555, "success_rate.epoch.global": 0.8488125530110263, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951230492196879, "tokens_p.mean_in_band": 0.621337890625, "tokens_rate.above_band": 0.9811542991755006, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01884570082449941 }, { "epoch": 1.476063829787234, "grad_norm": 27.937648557803165, "learning_rate": 1.666520086690346e-07, "loss": 0.2021, "step": 8880, "success_rate.epoch.env.abd": 0.5862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.4358974358974359, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5833333333333334, "success_rate.epoch.env.math": 0.974320241691843, "success_rate.epoch.env.sat": 0.09693877551020408, "success_rate.epoch.env.science": 0.9533508541392904, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5846383239179871, "success_rate.epoch.global": 0.8490685859441152, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994, "tokens_p.mean_below_band": 2.1047890186309814e-07, "tokens_p.mean_in_band": 0.810546875, "tokens_rate.above_band": 0.9727626459143969, "tokens_rate.below_band": 0.0038910505836575876, "tokens_rate.in_band": 0.023346303501945526 }, { "epoch": 1.4768949468085106, "grad_norm": 53.5136228833275, "learning_rate": 1.6663217874642284e-07, "loss": 0.1454, "step": 8885, "success_rate.epoch.env.abd": 0.5862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.4358974358974359, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5833333333333334, "success_rate.epoch.env.math": 0.9743589743589743, "success_rate.epoch.env.sat": 0.10152284263959391, "success_rate.epoch.env.science": 0.9534273532305674, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5850655329077868, "success_rate.epoch.global": 0.8492919044599451, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9915865384615384, "tokens_p.mean_in_band": 0.72265625, "tokens_rate.above_band": 0.9891304347826086, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010869565217391304 }, { "epoch": 1.4777260638297873, "grad_norm": 174.38402611895302, "learning_rate": 1.6661237254841885e-07, "loss": 0.219, "step": 8890, "success_rate.epoch.env.abd": 0.5862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.4358974358974359, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5833333333333334, "success_rate.epoch.env.math": 0.9743589743589743, "success_rate.epoch.env.sat": 0.10152284263959391, "success_rate.epoch.env.science": 0.953222113182859, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5850468747216315, "success_rate.epoch.global": 0.8493353028064993, "success_rate.window.env.science": 0.9, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9877083333333333, "tokens_p.mean_below_band": 6.845220923423767e-08, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9803921568627451, "tokens_rate.below_band": 0.006535947712418301, "tokens_rate.in_band": 0.013071895424836602 }, { "epoch": 1.4785571808510638, "grad_norm": 159.8681528008941, "learning_rate": 1.6659259011017374e-07, "loss": 0.4327, "step": 8895, "success_rate.epoch.env.abd": 0.5862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.4358974358974359, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5833333333333334, "success_rate.epoch.env.math": 0.9743589743589743, "success_rate.epoch.env.sat": 0.10606060606060606, "success_rate.epoch.env.science": 0.9533137446947437, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5854677288064404, "success_rate.epoch.global": 0.8495575221238938, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972789115646259, "tokens_p.mean_in_band": 0.6131628787878788, "tokens_rate.above_band": 0.95703125, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04296875 }, { "epoch": 1.4793882978723405, "grad_norm": 115.93399448210958, "learning_rate": 1.6657283146679654e-07, "loss": 0.3512, "step": 8900, "success_rate.epoch.env.abd": 0.5862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5830258302583026, "success_rate.epoch.env.math": 0.9743975903614458, "success_rate.epoch.env.sat": 0.10552763819095477, "success_rate.epoch.env.science": 0.9533594259621657, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5858335794000286, "success_rate.epoch.global": 0.8493899873790492, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9959577114427861, "tokens_p.mean_in_band": 0.5632267441860465, "tokens_rate.above_band": 0.9655724579663731, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0344275420336269 }, { "epoch": 1.480219414893617, "grad_norm": 60.08968730788985, "learning_rate": 1.66553096653354e-07, "loss": 0.2483, "step": 8905, "success_rate.epoch.env.abd": 0.5862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5845588235294118, "success_rate.epoch.env.math": 0.9743975903614458, "success_rate.epoch.env.sat": 0.10552763819095477, "success_rate.epoch.env.science": 0.9530791788856305, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5859474654177171, "success_rate.epoch.global": 0.8493380962387056, "success_rate.window.env.logic": 0.75, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.5166666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9963855421686747, "tokens_p.mean_below_band": 2.8405338525772095e-08, "tokens_p.mean_in_band": 0.6160300925925926, "tokens_rate.above_band": 0.9673659673659674, "tokens_rate.below_band": 0.0011655011655011655, "tokens_rate.in_band": 0.03146853146853147 }, { "epoch": 1.4810505319148937, "grad_norm": 102.70127530710973, "learning_rate": 1.6653338570487053e-07, "loss": 0.28, "step": 8910, "success_rate.epoch.env.abd": 0.5862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5834862385321101, "success_rate.epoch.env.math": 0.9744360902255639, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.9531554977228367, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5858124286461778, "success_rate.epoch.global": 0.8491713866163205, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9927014802631579, "tokens_p.mean_in_band": 0.7341432733050848, "tokens_rate.above_band": 0.9115442278860569, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08845577211394302 }, { "epoch": 1.4818816489361701, "grad_norm": 71.41499388757578, "learning_rate": 1.6651369865632824e-07, "loss": 0.2772, "step": 8915, "success_rate.epoch.env.abd": 0.5862068965517241, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5834862385321101, "success_rate.epoch.env.math": 0.9744744744744744, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.9532467532467532, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5858242140800712, "success_rate.epoch.global": 0.84939254294093, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954315707620529, "tokens_p.mean_in_band": 0.6552734375, "tokens_rate.above_band": 0.981679389312977, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0183206106870229 }, { "epoch": 1.4827127659574468, "grad_norm": 421.47318922803146, "learning_rate": 1.6649403554266688e-07, "loss": 0.4243, "step": 8920, "success_rate.epoch.env.abd": 0.5666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5834862385321101, "success_rate.epoch.env.math": 0.9745508982035929, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.9532922478105741, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5840589130262424, "success_rate.epoch.global": 0.8493723849372385, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9929022082018928, "tokens_p.mean_in_band": 0.20500444231820666, "tokens_rate.above_band": 0.5097829000268025, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.4902170999731975 }, { "epoch": 1.4835438829787235, "grad_norm": 82.65505652781167, "learning_rate": 1.6647439639878363e-07, "loss": 0.2155, "step": 8925, "success_rate.epoch.env.abd": 0.5666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5834862385321101, "success_rate.epoch.env.math": 0.9745889387144993, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.953307392996109, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5840637480895551, "success_rate.epoch.global": 0.849435382685069, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9894957983193278, "tokens_p.mean_below_band": 1.0277290130034089e-10, "tokens_rate.above_band": 0.9916666666666667, "tokens_rate.below_band": 0.008333333333333333, "tokens_rate.in_band": 0.0 }, { "epoch": 1.484375, "grad_norm": 119.68829124966119, "learning_rate": 1.664547812595331e-07, "loss": 0.2534, "step": 8930, "success_rate.epoch.env.abd": 0.5666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.44642857142857145, "success_rate.epoch.env.logic": 0.5834862385321101, "success_rate.epoch.env.math": 0.9745889387144993, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.9533980582524272, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5840719903855841, "success_rate.epoch.global": 0.849624060150376, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9906015037593985, "tokens_p.mean_in_band": 0.79296875, "tokens_rate.above_band": 0.9925373134328358, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007462686567164179 }, { "epoch": 1.4852061170212765, "grad_norm": 60.19511614034739, "learning_rate": 1.6643519015972743e-07, "loss": 0.2971, "step": 8935, "success_rate.epoch.env.abd": 0.5666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4424778761061947, "success_rate.epoch.env.logic": 0.5834862385321101, "success_rate.epoch.env.math": 0.9746268656716418, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.953443258971872, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.583720393326876, "success_rate.epoch.global": 0.8495722929271855, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9886431116389549, "tokens_p.mean_in_band": 0.6542632004310345, "tokens_rate.above_band": 0.8789144050104384, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12108559498956159 }, { "epoch": 1.4860372340425532, "grad_norm": 11.759328224689535, "learning_rate": 1.6641562313413601e-07, "loss": 0.2086, "step": 8940, "success_rate.epoch.env.abd": 0.5666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4424778761061947, "success_rate.epoch.env.logic": 0.5834862385321101, "success_rate.epoch.env.math": 0.9731743666169895, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.9535333978702807, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.583596542403581, "success_rate.epoch.global": 0.8495833333333334, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9944672131147541, "tokens_p.mean_below_band": 8.451752364635468e-08, "tokens_p.mean_in_band": 0.6826923076923077, "tokens_rate.above_band": 0.953125, "tokens_rate.below_band": 0.00625, "tokens_rate.in_band": 0.040625 }, { "epoch": 1.4868683510638299, "grad_norm": 31.196475492201976, "learning_rate": 1.6639608021748553e-07, "loss": 0.2216, "step": 8945, "success_rate.epoch.env.abd": 0.5666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4424778761061947, "success_rate.epoch.env.logic": 0.5842490842490843, "success_rate.epoch.env.math": 0.9731743666169895, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.9536082474226805, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5836726965189787, "success_rate.epoch.global": 0.8497711194340408, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9858606557377049, "tokens_p.mean_in_band": 0.7458333333333333, "tokens_rate.above_band": 0.8714285714285714, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12857142857142856 }, { "epoch": 1.4876994680851063, "grad_norm": 141.03161486347423, "learning_rate": 1.6637656144445982e-07, "loss": 0.2986, "step": 8950, "success_rate.epoch.env.abd": 0.5666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4424778761061947, "success_rate.epoch.env.logic": 0.5850091407678245, "success_rate.epoch.env.math": 0.9731743666169895, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.9536977491961415, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.583749929090997, "success_rate.epoch.global": 0.8499896114689383, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9947683109118086, "tokens_p.mean_in_band": 0.583984375, "tokens_rate.above_band": 0.9911111111111112, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008888888888888889 }, { "epoch": 1.488530585106383, "grad_norm": 93.43635198484678, "learning_rate": 1.6635706684969995e-07, "loss": 0.3848, "step": 8955, "success_rate.epoch.env.abd": 0.5666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4424778761061947, "success_rate.epoch.env.logic": 0.583941605839416, "success_rate.epoch.env.math": 0.9732540861812778, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.9537126325940212, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5836614807304297, "success_rate.epoch.global": 0.8499065808594561, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.996875, "tokens_p.mean_in_band": 0.6641773897058824, "tokens_rate.above_band": 0.9549071618037135, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04509283819628647 }, { "epoch": 1.4893617021276595, "grad_norm": 172.266641715584, "learning_rate": 1.6633759646780405e-07, "loss": 0.2493, "step": 8960, "success_rate.epoch.env.abd": 0.5666666666666667, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4424778761061947, "success_rate.epoch.env.logic": 0.583941605839416, "success_rate.epoch.env.math": 0.973293768545994, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.9537869062901155, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5836718403723216, "success_rate.epoch.global": 0.8500933029234916, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9901785714285715, "tokens_p.mean_in_band": 0.83203125, "tokens_rate.above_band": 0.9929078014184397, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0070921985815602835 }, { "epoch": 1.4901928191489362, "grad_norm": 284.3213629765886, "learning_rate": 1.663181503333273e-07, "loss": 0.4245, "step": 8965, "success_rate.epoch.env.abd": 0.5483870967741935, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.43859649122807015, "success_rate.epoch.env.logic": 0.583941605839416, "success_rate.epoch.env.math": 0.9733333333333334, "success_rate.epoch.env.sat": 0.105, "success_rate.epoch.env.science": 0.9538313562039115, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5816648458205522, "success_rate.epoch.global": 0.8498653965624353, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9939673157162726, "tokens_p.mean_below_band": 2.9193082203467685e-07, "tokens_p.mean_in_band": 0.3298333845208845, "tokens_rate.above_band": 0.8148232094288305, "tokens_rate.below_band": 0.0006799637352674524, "tokens_rate.in_band": 0.18449682683590207 }, { "epoch": 1.4910239361702127, "grad_norm": 36.14850784228584, "learning_rate": 1.6629872848078182e-07, "loss": 0.2204, "step": 8970, "success_rate.epoch.env.abd": 0.5483870967741935, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4434782608695652, "success_rate.epoch.env.logic": 0.583941605839416, "success_rate.epoch.env.math": 0.9733333333333334, "success_rate.epoch.env.sat": 0.10945273631840796, "success_rate.epoch.env.science": 0.9535851472471191, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5824910546390168, "success_rate.epoch.global": 0.8498759305210918, "success_rate.window.env.ded": 0.5, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9947320680993581, "tokens_p.mean_below_band": 4.1574821807444096e-07, "tokens_p.mean_in_band": 0.5336565290178571, "tokens_rate.above_band": 0.8884205306223655, "tokens_rate.below_band": 0.0004959087527894868, "tokens_rate.in_band": 0.11108356062484503 }, { "epoch": 1.4918550531914894, "grad_norm": 69.67409498344419, "learning_rate": 1.662793309446366e-07, "loss": 0.339, "step": 8975, "success_rate.epoch.env.abd": 0.5483870967741935, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4482758620689655, "success_rate.epoch.env.logic": 0.583941605839416, "success_rate.epoch.env.math": 0.9733333333333334, "success_rate.epoch.env.sat": 0.10945273631840796, "success_rate.epoch.env.science": 0.9533098816757275, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5829021760597449, "success_rate.epoch.global": 0.849793388429752, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9943860619469026, "tokens_p.mean_below_band": 3.9301812648773193e-07, "tokens_p.mean_in_band": 0.8189290364583334, "tokens_rate.above_band": 0.9584393553859203, "tokens_rate.below_band": 0.0008481764206955047, "tokens_rate.in_band": 0.04071246819338423 }, { "epoch": 1.492686170212766, "grad_norm": 25.172341789178848, "learning_rate": 1.6625995775931753e-07, "loss": 0.4, "step": 8980, "success_rate.epoch.env.abd": 0.5483870967741935, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4482758620689655, "success_rate.epoch.env.logic": 0.583941605839416, "success_rate.epoch.env.math": 0.9734121122599705, "success_rate.epoch.env.sat": 0.10945273631840796, "success_rate.epoch.env.science": 0.9533397251518057, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5829120508236282, "success_rate.epoch.global": 0.8499174236168456, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994762569832403, "tokens_p.mean_in_band": 0.7009943181818182, "tokens_rate.above_band": 0.984869325997249, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015130674002751032 }, { "epoch": 1.4935172872340425, "grad_norm": 101.60215212174798, "learning_rate": 1.6624060895920731e-07, "loss": 0.158, "step": 8985, "success_rate.epoch.env.abd": 0.5483870967741935, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4482758620689655, "success_rate.epoch.env.logic": 0.583941605839416, "success_rate.epoch.env.math": 0.9734904270986745, "success_rate.epoch.env.sat": 0.10945273631840796, "success_rate.epoch.env.science": 0.9533844189016603, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5829232334225879, "success_rate.epoch.global": 0.850072179830893, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982289042821159, "tokens_p.mean_in_band": 0.7236689814814815, "tokens_rate.above_band": 0.9932449337002752, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006755066299724793 }, { "epoch": 1.4943484042553192, "grad_norm": 44.46585791465407, "learning_rate": 1.6622128457864528e-07, "loss": 0.2366, "step": 8990, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4482758620689655, "success_rate.epoch.env.logic": 0.5836363636363636, "success_rate.epoch.env.math": 0.9736070381231672, "success_rate.epoch.env.sat": 0.10945273631840796, "success_rate.epoch.env.science": 0.9533992977976381, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5841904289628812, "success_rate.epoch.global": 0.8500823723228995, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988344988344988, "tokens_p.mean_in_band": 0.6252170138888888, "tokens_rate.above_band": 0.9794520547945206, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02054794520547945 }, { "epoch": 1.4951795212765957, "grad_norm": 55.129395867056154, "learning_rate": 1.662019846519275e-07, "loss": 0.2492, "step": 8995, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.452991452991453, "success_rate.epoch.env.logic": 0.5836363636363636, "success_rate.epoch.env.math": 0.9736070381231672, "success_rate.epoch.env.sat": 0.10945273631840796, "success_rate.epoch.env.science": 0.9535031847133758, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5846285633118108, "success_rate.epoch.global": 0.850328947368421, "success_rate.window.env.ded": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949245939675174, "tokens_p.mean_in_band": 0.61767578125, "tokens_rate.above_band": 0.9642058165548099, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.035794183445190156 }, { "epoch": 1.4960106382978724, "grad_norm": 121.23035664122499, "learning_rate": 1.661827092133066e-07, "loss": 0.3351, "step": 9000, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4491525423728814, "success_rate.epoch.env.logic": 0.5836363636363636, "success_rate.epoch.env.math": 0.9736070381231672, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.9535475660197263, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5842343474443997, "success_rate.epoch.global": 0.8500718833436024, "success_rate.window.env.ded": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9883373205741627, "tokens_p.mean_in_band": 0.6793653350515464, "tokens_rate.above_band": 0.8660220994475138, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13397790055248618 }, { "epoch": 1.4968417553191489, "grad_norm": 52.31826487450914, "learning_rate": 1.6616345829699175e-07, "loss": 0.3332, "step": 9005, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.4, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4491525423728814, "success_rate.epoch.env.logic": 0.5851449275362319, "success_rate.epoch.env.math": 0.9736456808199122, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.9536066094693358, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5843803701758745, "success_rate.epoch.global": 0.850287120590648, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9971955128205128, "tokens_p.mean_below_band": 4.0605664253234863e-07, "tokens_p.mean_in_band": 0.15475751768867924, "tokens_rate.above_band": 0.7455197132616488, "tokens_rate.below_band": 0.0011947431302270011, "tokens_rate.in_band": 0.2532855436081243 }, { "epoch": 1.4976728723404256, "grad_norm": 65.10165148066402, "learning_rate": 1.6614423193714862e-07, "loss": 0.2413, "step": 9010, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4406779661016949, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.5858951175406871, "success_rate.epoch.env.math": 0.9736456808199122, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.9536507936507936, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5943781124619077, "success_rate.epoch.global": 0.8505017407331559, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972230913642053, "tokens_p.mean_in_band": 0.43419989224137934, "tokens_rate.above_band": 0.9821757836508912, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01782421634910879 }, { "epoch": 1.4985039893617023, "grad_norm": 54.229177669455524, "learning_rate": 1.6612503016789928e-07, "loss": 0.4671, "step": 9015, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.44537815126050423, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.5866425992779783, "success_rate.epoch.env.math": 0.9736456808199122, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.9536948937519822, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6013708705006223, "success_rate.epoch.global": 0.8506852116997341, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972718253968254, "tokens_p.mean_below_band": 4.172325134277344e-07, "tokens_p.mean_in_band": 0.609375, "tokens_rate.above_band": 0.9940828402366864, "tokens_rate.below_band": 0.0019723865877712033, "tokens_rate.in_band": 0.0039447731755424065 }, { "epoch": 1.4993351063829787, "grad_norm": 42.83299544093687, "learning_rate": 1.6610585302332222e-07, "loss": 0.1372, "step": 9020, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.44537815126050423, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.5866425992779783, "success_rate.epoch.env.math": 0.9736456808199122, "success_rate.epoch.env.sat": 0.10837438423645321, "success_rate.epoch.env.science": 0.9537682077264091, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6013287620571469, "success_rate.epoch.global": 0.8506639427987742, "success_rate.window.env.abd": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9922542735042735, "tokens_p.mean_below_band": 5.893525667488575e-10, "tokens_p.mean_in_band": 0.7458767361111112, "tokens_rate.above_band": 0.924901185770751, "tokens_rate.below_band": 0.003952569169960474, "tokens_rate.in_band": 0.07114624505928854 }, { "epoch": 1.5001662234042552, "grad_norm": 64.11192445922434, "learning_rate": 1.6608670053745211e-07, "loss": 0.2851, "step": 9025, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.44537815126050423, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.5863309352517986, "success_rate.epoch.env.math": 0.9737226277372263, "success_rate.epoch.env.sat": 0.10837438423645321, "success_rate.epoch.env.science": 0.9535251343661081, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.60128532655995, "success_rate.epoch.global": 0.8505301794453507, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9963908450704225, "tokens_p.mean_in_band": 0.5990032327586207, "tokens_rate.above_band": 0.9607577807848444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03924221921515562 }, { "epoch": 1.500997340425532, "grad_norm": 100.63425747748076, "learning_rate": 1.6606757274427995e-07, "loss": 0.3297, "step": 9030, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.44537815126050423, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.5863309352517986, "success_rate.epoch.env.math": 0.9737609329446064, "success_rate.epoch.env.sat": 0.10837438423645321, "success_rate.epoch.env.science": 0.9532680770445217, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.601265440004113, "success_rate.epoch.global": 0.8504787125687513, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9931264172335601, "tokens_p.mean_in_band": 0.7089146205357143, "tokens_rate.above_band": 0.9402985074626866, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05970149253731343 }, { "epoch": 1.5018284574468086, "grad_norm": 33.79839899285133, "learning_rate": 1.6604846967775284e-07, "loss": 0.2661, "step": 9035, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.44537815126050423, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4583333333333333, "success_rate.epoch.env.logic": 0.5870736086175943, "success_rate.epoch.env.math": 0.9738372093023255, "success_rate.epoch.env.sat": 0.10837438423645321, "success_rate.epoch.env.science": 0.9533270261747083, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6013452489908132, "success_rate.epoch.global": 0.8506916192026037, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961705748865356, "tokens_p.mean_below_band": 6.693881005048752e-10, "tokens_p.mean_in_band": 0.7421875, "tokens_rate.above_band": 0.9836309523809523, "tokens_rate.below_band": 0.001488095238095238, "tokens_rate.in_band": 0.01488095238095238 }, { "epoch": 1.502659574468085, "grad_norm": 115.80812163572587, "learning_rate": 1.6602939137177414e-07, "loss": 0.2782, "step": 9040, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.44537815126050423, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.5870736086175943, "success_rate.epoch.env.math": 0.9738372093023255, "success_rate.epoch.env.sat": 0.10837438423645321, "success_rate.epoch.env.science": 0.9534151715454832, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6017602243901599, "success_rate.epoch.global": 0.8509039203737558, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9934463562753036, "tokens_p.mean_in_band": 0.715625, "tokens_rate.above_band": 0.96484375, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03515625 }, { "epoch": 1.5034906914893615, "grad_norm": 35.25153601091879, "learning_rate": 1.6601033786020309e-07, "loss": 0.3294, "step": 9045, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.44537815126050423, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.5857142857142857, "success_rate.epoch.env.math": 0.9738751814223512, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.9534591194968554, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.601595801679845, "success_rate.epoch.global": 0.8505374163455689, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9976694065757818, "tokens_p.mean_in_band": 0.5175304878048781, "tokens_rate.above_band": 0.968167701863354, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03183229813664596 }, { "epoch": 1.5043218085106385, "grad_norm": 65.40337430793437, "learning_rate": 1.65991309176855e-07, "loss": 0.2708, "step": 9050, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.44166666666666665, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.5857142857142857, "success_rate.epoch.env.math": 0.9739507959479016, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.9535175879396985, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6012705833502593, "success_rate.epoch.global": 0.850546780072904, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949392712550608, "tokens_p.mean_in_band": 0.73046875, "tokens_rate.above_band": 0.993963782696177, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006036217303822937 }, { "epoch": 1.505152925531915, "grad_norm": 21.08796151853228, "learning_rate": 1.6597230535550118e-07, "loss": 0.4217, "step": 9055, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.44166666666666665, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.5846702317290553, "success_rate.epoch.env.math": 0.9739507959479016, "success_rate.epoch.env.sat": 0.1073170731707317, "success_rate.epoch.env.science": 0.953590467231107, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6011344707340782, "success_rate.epoch.global": 0.8503538928210314, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9940131578947369, "tokens_p.mean_in_band": 0.7018229166666666, "tokens_rate.above_band": 0.9547738693467337, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04522613065326633 }, { "epoch": 1.5059840425531914, "grad_norm": 41.1613974825275, "learning_rate": 1.659533264298687e-07, "loss": 0.1876, "step": 9060, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4462809917355372, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.5846702317290553, "success_rate.epoch.env.math": 0.9739507959479016, "success_rate.epoch.env.sat": 0.1073170731707317, "success_rate.epoch.env.science": 0.9536921151439299, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6015631955505957, "success_rate.epoch.global": 0.8505955986270947, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9965175953079178, "tokens_p.mean_in_band": 0.7786458333333334, "tokens_rate.above_band": 0.9912790697674418, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00872093023255814 }, { "epoch": 1.506815159574468, "grad_norm": 31.719517489764847, "learning_rate": 1.6593437243364054e-07, "loss": 0.2026, "step": 9065, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4462809917355372, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.5854092526690391, "success_rate.epoch.env.math": 0.9739507959479016, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9537788881948782, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6015909080346916, "success_rate.epoch.global": 0.850634952630518, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9899475524475524, "tokens_p.mean_in_band": 0.7780172413793104, "tokens_rate.above_band": 0.9079365079365079, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09206349206349207 }, { "epoch": 1.5076462765957448, "grad_norm": 137.53736810330713, "learning_rate": 1.659154434004554e-07, "loss": 0.217, "step": 9070, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4462809917355372, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.45454545454545453, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.5865724381625441, "success_rate.epoch.env.math": 0.9739507959479016, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9538365564566438, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6017018947397162, "success_rate.epoch.global": 0.850674179915476, "success_rate.window.env.logic": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951338199513382, "tokens_p.mean_in_band": 0.6711956521739131, "tokens_rate.above_band": 0.9727810650887574, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027218934911242602 }, { "epoch": 1.5084773936170213, "grad_norm": 50.05467060219551, "learning_rate": 1.6589653936390763e-07, "loss": 0.2862, "step": 9075, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4462809917355372, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4628099173553719, "success_rate.epoch.env.logic": 0.5855379188712522, "success_rate.epoch.env.math": 0.9739884393063584, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9538797133063259, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5981716668340852, "success_rate.epoch.global": 0.8504522613065326, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9958722014925373, "tokens_p.mean_in_band": 0.6336096938775511, "tokens_rate.above_band": 0.9318497913769124, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06815020862308763 }, { "epoch": 1.5093085106382977, "grad_norm": 63.026998770336384, "learning_rate": 1.6587766035754733e-07, "loss": 0.4296, "step": 9080, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4426229508196721, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5855379188712522, "success_rate.epoch.env.math": 0.9739884393063584, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9536403235843186, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5974724891476652, "success_rate.epoch.global": 0.8500602167804094, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.2857142857142857, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9911995515695067, "tokens_p.mean_below_band": 6.07222318649292e-07, "tokens_p.mean_in_band": 0.5039415049638989, "tokens_rate.above_band": 0.8004307250538406, "tokens_rate.below_band": 0.0007178750897343862, "tokens_rate.in_band": 0.19885139985642497 }, { "epoch": 1.5101396276595744, "grad_norm": 38.18815544643185, "learning_rate": 1.6585880641488006e-07, "loss": 0.2506, "step": 9085, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4426229508196721, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5855379188712522, "success_rate.epoch.env.math": 0.9739884393063584, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9537267080745342, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5974803422831393, "success_rate.epoch.global": 0.8502405773857258, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9832589285714286, "tokens_p.mean_in_band": 0.8984375, "tokens_rate.above_band": 0.9940828402366864, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005917159763313609 }, { "epoch": 1.5109707446808511, "grad_norm": 111.98814150925904, "learning_rate": 1.6583997756936697e-07, "loss": 0.4014, "step": 9090, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4426229508196721, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5845070422535211, "success_rate.epoch.env.math": 0.974025974025974, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9538270839789278, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5973991635564372, "success_rate.epoch.global": 0.850310186111667, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976035276073619, "tokens_p.mean_in_band": 0.6294642857142857, "tokens_rate.above_band": 0.9893778452200304, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010622154779969651 }, { "epoch": 1.5118018617021276, "grad_norm": 149.57921447423726, "learning_rate": 1.6582117385442466e-07, "loss": 0.2177, "step": 9095, "success_rate.epoch.env.abd": 0.5625, "success_rate.epoch.env.agentgym:alfworld": 0.4426229508196721, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5845070422535211, "success_rate.epoch.env.math": 0.974025974025974, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9535891089108911, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.597377529459343, "success_rate.epoch.global": 0.8502598960415834, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9942484662576687, "tokens_p.mean_below_band": 1.2514647096395493e-09, "tokens_p.mean_in_band": 0.7103365384615384, "tokens_rate.above_band": 0.978978978978979, "tokens_rate.below_band": 0.0015015015015015015, "tokens_rate.in_band": 0.01951951951951952 }, { "epoch": 1.5126329787234043, "grad_norm": 32.09828574613877, "learning_rate": 1.658023953034251e-07, "loss": 0.159, "step": 9100, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.44715447154471544, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5845070422535211, "success_rate.epoch.env.math": 0.974025974025974, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.953646477132262, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5962451144048848, "success_rate.epoch.global": 0.8502396166134185, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9865314769975787, "tokens_p.mean_in_band": 0.740234375, "tokens_rate.above_band": 0.8694736842105263, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13052631578947368 }, { "epoch": 1.513464095744681, "grad_norm": 19.48703555281662, "learning_rate": 1.657836419496956e-07, "loss": 0.2382, "step": 9105, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.44715447154471544, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5845070422535211, "success_rate.epoch.env.math": 0.9741007194244604, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9537322640345466, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5962597082504094, "success_rate.epoch.global": 0.8504784688995215, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967447916666666, "tokens_p.mean_in_band": 0.4375, "tokens_rate.above_band": 0.9970326409495549, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002967359050445104 }, { "epoch": 1.5142952127659575, "grad_norm": 33.742789360988446, "learning_rate": 1.6576491382651882e-07, "loss": 0.1538, "step": 9110, "success_rate.epoch.env.abd": 0.5454545454545454, "success_rate.epoch.env.agentgym:alfworld": 0.44715447154471544, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5852372583479789, "success_rate.epoch.env.math": 0.9741750358680057, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9537750385208013, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5963367361617057, "success_rate.epoch.global": 0.8506571087216248, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9995375264270613, "tokens_p.mean_in_band": 0.72998046875, "tokens_rate.above_band": 0.9916142557651991, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008385744234800839 }, { "epoch": 1.515126329787234, "grad_norm": 112.96616817973512, "learning_rate": 1.6574621096713256e-07, "loss": 0.3221, "step": 9115, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.44715447154471544, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5852372583479789, "success_rate.epoch.env.math": 0.9741750358680057, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9538177339901478, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5975559797459389, "success_rate.epoch.global": 0.8507759649820931, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9642857142857143, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.98861646234676, "tokens_p.mean_in_band": 0.75, "tokens_rate.above_band": 0.964527027027027, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03547297297297297 }, { "epoch": 1.5159574468085106, "grad_norm": 67.7413967926489, "learning_rate": 1.6572753340472977e-07, "loss": 0.266, "step": 9120, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.44715447154471544, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5852372583479789, "success_rate.epoch.env.math": 0.9742120343839542, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9538745387453874, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5975645073160469, "success_rate.epoch.global": 0.8509242695289206, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9902137367915466, "tokens_p.mean_in_band": 0.48442055393586003, "tokens_rate.above_band": 0.7521676300578035, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.24783236994219654 }, { "epoch": 1.5167885638297873, "grad_norm": 56.2153090918143, "learning_rate": 1.6570888117245864e-07, "loss": 0.3855, "step": 9125, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5842105263157895, "success_rate.epoch.env.math": 0.9742857142857143, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9539028887523049, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.597885755457645, "success_rate.epoch.global": 0.8509033154655549, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9980926188786373, "tokens_p.mean_in_band": 0.6264204545454546, "tokens_rate.above_band": 0.9846261355695318, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015373864430468204 }, { "epoch": 1.5176196808510638, "grad_norm": 106.0890985976148, "learning_rate": 1.6569025430342223e-07, "loss": 0.1821, "step": 9130, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5831873905429071, "success_rate.epoch.env.math": 0.9742857142857143, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9539453484801965, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5977966030899186, "success_rate.epoch.global": 0.8508232493552866, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9967866323907455, "tokens_p.mean_in_band": 0.7315244932432432, "tokens_rate.above_band": 0.9546012269938651, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04539877300613497 }, { "epoch": 1.5184507978723403, "grad_norm": 134.16236693681458, "learning_rate": 1.656716528306787e-07, "loss": 0.2763, "step": 9135, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5831873905429071, "success_rate.epoch.env.math": 0.9743223965763196, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9540300337113086, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5978076365009838, "success_rate.epoch.global": 0.8510301109350238, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976580796252927, "tokens_p.mean_in_band": 0.5834173387096774, "tokens_rate.above_band": 0.9649717514124294, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03502824858757062 }, { "epoch": 1.519281914893617, "grad_norm": 61.672083929726746, "learning_rate": 1.6565307678724115e-07, "loss": 0.2687, "step": 9140, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5831873905429071, "success_rate.epoch.env.math": 0.9743589743589743, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9540863177226814, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5978160784822589, "success_rate.epoch.global": 0.8511775183059569, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9928909952606635, "tokens_p.mean_in_band": 0.46875, "tokens_rate.above_band": 0.9952830188679245, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0047169811320754715 }, { "epoch": 1.5201130319148937, "grad_norm": 44.872902430958916, "learning_rate": 1.6563452620607744e-07, "loss": 0.2294, "step": 9145, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5828970331588132, "success_rate.epoch.env.math": 0.9743589743589743, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9541424640782635, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5977947865705762, "success_rate.epoch.global": 0.8511563550108717, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9999075443786982, "tokens_p.mean_in_band": 0.5841346153846154, "tokens_rate.above_band": 0.9811320754716981, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018867924528301886 }, { "epoch": 1.5209441489361701, "grad_norm": 9.310281083891633, "learning_rate": 1.6561600112011033e-07, "loss": 0.2573, "step": 9150, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5836236933797909, "success_rate.epoch.env.math": 0.9743954480796586, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9539212694537686, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5978440537812276, "success_rate.epoch.global": 0.8511940003947108, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9925426136363636, "tokens_p.mean_in_band": 0.486328125, "tokens_rate.above_band": 0.9887640449438202, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011235955056179775 }, { "epoch": 1.5217752659574468, "grad_norm": 31.75486191419853, "learning_rate": 1.655975015622173e-07, "loss": 0.277, "step": 9155, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5836236933797909, "success_rate.epoch.env.math": 0.9744680851063829, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9539914686166971, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5978570388893779, "success_rate.epoch.global": 0.8513992905005913, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9876838235294118, "tokens_p.mean_in_band": 0.826171875, "tokens_rate.above_band": 0.9770114942528736, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022988505747126436 }, { "epoch": 1.5226063829787235, "grad_norm": 67.05705649845474, "learning_rate": 1.6557902756523056e-07, "loss": 0.1623, "step": 9160, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5836236933797909, "success_rate.epoch.env.math": 0.9745042492917847, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9540334855403348, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5978641462629269, "success_rate.epoch.global": 0.8515163450177236, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9951652601969058, "tokens_p.mean_in_band": 0.746875, "tokens_rate.above_band": 0.9930167597765364, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006983240223463687 }, { "epoch": 1.5234375, "grad_norm": 104.59529823614973, "learning_rate": 1.6556057916193691e-07, "loss": 0.2262, "step": 9165, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5843478260869566, "success_rate.epoch.env.math": 0.9745403111739745, "success_rate.epoch.env.sat": 0.10628019323671498, "success_rate.epoch.env.science": 0.9540893888719976, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5978914348677341, "success_rate.epoch.global": 0.8515240904621436, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9925699300699301, "tokens_p.mean_in_band": 0.7215909090909091, "tokens_rate.above_band": 0.9285714285714286, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07142857142857142 }, { "epoch": 1.5242686170212765, "grad_norm": 223.77072129465301, "learning_rate": 1.6554215638507775e-07, "loss": 0.4399, "step": 9170, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.448, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.58578856152513, "success_rate.epoch.env.math": 0.9745762711864406, "success_rate.epoch.env.sat": 0.10628019323671498, "success_rate.epoch.env.science": 0.9541451563923474, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5977023039354781, "success_rate.epoch.global": 0.8515609660318084, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9969117647058824, "tokens_p.mean_in_band": 0.6912977430555556, "tokens_rate.above_band": 0.9593679458239278, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.040632054176072234 }, { "epoch": 1.5250997340425532, "grad_norm": 48.50251024765517, "learning_rate": 1.65523759267349e-07, "loss": 0.1936, "step": 9175, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.448, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.58578856152513, "success_rate.epoch.env.math": 0.9745762711864406, "success_rate.epoch.env.sat": 0.10628019323671498, "success_rate.epoch.env.science": 0.9542285541073052, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5977098855459287, "success_rate.epoch.global": 0.8517356344381252, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99406400550585, "tokens_p.mean_in_band": 0.744194380733945, "tokens_rate.above_band": 0.9302176696542894, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06978233034571063 }, { "epoch": 1.5259308510638299, "grad_norm": 114.72551124621418, "learning_rate": 1.6550538784140102e-07, "loss": 0.2429, "step": 9180, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.448, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5865051903114187, "success_rate.epoch.env.math": 0.9746121297602257, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.9542701393095094, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5976896162023196, "success_rate.epoch.global": 0.8515471993732864, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9935453400503779, "tokens_p.mean_in_band": 0.25830639367816094, "tokens_rate.above_band": 0.5328859060402684, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.4671140939597315 }, { "epoch": 1.5267619680851063, "grad_norm": 46.47264155746653, "learning_rate": 1.6548704213983863e-07, "loss": 0.3098, "step": 9185, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.448, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5854922279792746, "success_rate.epoch.env.math": 0.9746835443037974, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.9543254688445251, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5976090509065417, "success_rate.epoch.global": 0.8515548601603755, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9930555555555556, "tokens_p.mean_in_band": 0.6058799342105263, "tokens_rate.above_band": 0.9722627737226277, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027737226277372264 }, { "epoch": 1.527593085106383, "grad_norm": 56.0322737807118, "learning_rate": 1.654687221952209e-07, "loss": 0.258, "step": 9190, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.448, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5862068965517241, "success_rate.epoch.env.math": 0.9747545582047685, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.954380664652568, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5976854943866748, "success_rate.epoch.global": 0.8517578125, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9953703703703703, "tokens_p.mean_below_band": 8.003553375601768e-11, "tokens_rate.above_band": 0.9953917050691244, "tokens_rate.below_band": 0.004608294930875576, "tokens_rate.in_band": 0.0 }, { "epoch": 1.5284242021276597, "grad_norm": 85.32311690231185, "learning_rate": 1.6545042804006128e-07, "loss": 0.3552, "step": 9195, "success_rate.epoch.env.abd": 0.5588235294117647, "success_rate.epoch.env.agentgym:alfworld": 0.448, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.584192439862543, "success_rate.epoch.env.math": 0.9747545582047685, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.9544219740416541, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5975061173593934, "success_rate.epoch.global": 0.8515121951219512, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4666666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9958076923076923, "tokens_p.mean_in_band": 0.6093532986111111, "tokens_rate.above_band": 0.9475218658892128, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.052478134110787174 }, { "epoch": 1.5292553191489362, "grad_norm": 23.578310643582856, "learning_rate": 1.654321597068274e-07, "loss": 0.3208, "step": 9200, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.448, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5831903945111492, "success_rate.epoch.env.math": 0.9747899159663865, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.9544769369912571, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5985691462118147, "success_rate.epoch.global": 0.8515198752922837, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9975573491928632, "tokens_p.mean_in_band": 0.58125, "tokens_rate.above_band": 0.9874161073825504, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012583892617449664 }, { "epoch": 1.5300864361702127, "grad_norm": 219.26967171274, "learning_rate": 1.6541391722794108e-07, "loss": 0.3675, "step": 9205, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.448, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.582905982905983, "success_rate.epoch.env.math": 0.9747899159663865, "success_rate.epoch.env.sat": 0.10476190476190476, "success_rate.epoch.env.science": 0.9545180722891566, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5985014617172603, "success_rate.epoch.global": 0.8513040093421564, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9982887874837028, "tokens_p.mean_in_band": 0.5896990740740741, "tokens_rate.above_band": 0.9659949622166247, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03400503778337532 }, { "epoch": 1.5309175531914894, "grad_norm": 57.81549560444379, "learning_rate": 1.6539570063577828e-07, "loss": 0.2611, "step": 9210, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.448, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5826235093696763, "success_rate.epoch.env.math": 0.9747899159663865, "success_rate.epoch.env.sat": 0.10426540284360189, "success_rate.epoch.env.science": 0.9545864661654135, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5984368633919556, "success_rate.epoch.global": 0.8511465215701516, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972885032537961, "tokens_p.mean_in_band": 0.5329733455882353, "tokens_rate.above_band": 0.9760056457304164, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02399435426958363 }, { "epoch": 1.531748670212766, "grad_norm": 38.432345147878934, "learning_rate": 1.6537750996266894e-07, "loss": 0.3295, "step": 9215, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5833333333333334, "success_rate.epoch.env.math": 0.9748603351955307, "success_rate.epoch.env.sat": 0.10426540284360189, "success_rate.epoch.env.science": 0.9546137661556958, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5989085448068678, "success_rate.epoch.global": 0.8513198757763976, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9989722898903776, "tokens_p.mean_in_band": 0.5929581925675675, "tokens_rate.above_band": 0.9779630732578916, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022036926742108397 }, { "epoch": 1.5325797872340425, "grad_norm": 55.218612249185014, "learning_rate": 1.6535934524089704e-07, "loss": 0.1674, "step": 9220, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5830508474576271, "success_rate.epoch.env.math": 0.9749303621169917, "success_rate.epoch.env.sat": 0.10426540284360189, "success_rate.epoch.env.science": 0.9546546546546546, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5988929474927509, "success_rate.epoch.global": 0.851327776700911, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998433147632312, "tokens_p.mean_in_band": 0.6921164772727273, "tokens_rate.above_band": 0.9849108367626886, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015089163237311385 }, { "epoch": 1.533410904255319, "grad_norm": 157.58913675966403, "learning_rate": 1.6534120650270059e-07, "loss": 0.296, "step": 9225, "success_rate.epoch.env.abd": 0.5714285714285714, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45901639344262296, "success_rate.epoch.env.logic": 0.5827702702702703, "success_rate.epoch.env.math": 0.9749652294853964, "success_rate.epoch.env.sat": 0.10377358490566038, "success_rate.epoch.env.science": 0.9547226386806597, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.598832079880852, "success_rate.epoch.global": 0.8511996904024768, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9963576158940397, "tokens_p.mean_in_band": 0.60458984375, "tokens_rate.above_band": 0.949685534591195, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.050314465408805034 }, { "epoch": 1.5342420212765957, "grad_norm": 156.4276679582101, "learning_rate": 1.6532309378027132e-07, "loss": 0.3428, "step": 9230, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45528455284552843, "success_rate.epoch.env.logic": 0.5827702702702703, "success_rate.epoch.env.math": 0.9749652294853964, "success_rate.epoch.env.sat": 0.10377358490566038, "success_rate.epoch.env.science": 0.9547904191616766, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5970559820636616, "success_rate.epoch.global": 0.8510144927536232, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9961231203007519, "tokens_p.mean_in_band": 0.27459395559210525, "tokens_rate.above_band": 0.5833333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.4166666666666667 }, { "epoch": 1.5350731382978724, "grad_norm": 92.34474294267994, "learning_rate": 1.6530500710575493e-07, "loss": 0.2839, "step": 9235, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45528455284552843, "success_rate.epoch.env.logic": 0.5824915824915825, "success_rate.epoch.env.math": 0.975, "success_rate.epoch.env.sat": 0.10377358490566038, "success_rate.epoch.env.science": 0.9548174745661281, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5970362673491496, "success_rate.epoch.global": 0.850965250965251, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.993720718375846, "tokens_p.mean_in_band": 0.5350811298076923, "tokens_rate.above_band": 0.8602776533811016, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13972234661889835 }, { "epoch": 1.5359042553191489, "grad_norm": 145.6546261255225, "learning_rate": 1.652869465112508e-07, "loss": 0.2209, "step": 9240, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45528455284552843, "success_rate.epoch.env.logic": 0.5824915824915825, "success_rate.epoch.env.math": 0.9750692520775623, "success_rate.epoch.env.sat": 0.10377358490566038, "success_rate.epoch.env.science": 0.9548984468339307, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5970499241078191, "success_rate.epoch.global": 0.851195065535852, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9894396551724138, "tokens_p.mean_in_band": 0.865234375, "tokens_rate.above_band": 0.9863945578231292, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013605442176870748 }, { "epoch": 1.5367353723404256, "grad_norm": 48.84491229255858, "learning_rate": 1.652689120288121e-07, "loss": 0.2878, "step": 9245, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45528455284552843, "success_rate.epoch.env.logic": 0.5831932773109244, "success_rate.epoch.env.math": 0.9751381215469613, "success_rate.epoch.env.sat": 0.10328638497652583, "success_rate.epoch.env.science": 0.9546404058490003, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5970522262327896, "success_rate.epoch.global": 0.8510105871029836, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.575, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9992143146796432, "tokens_p.mean_in_band": 0.6204202586206896, "tokens_rate.above_band": 0.9770206022187005, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022979397781299524 }, { "epoch": 1.5375664893617023, "grad_norm": 10.066583449628155, "learning_rate": 1.6525090369044559e-07, "loss": 0.1641, "step": 9250, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45528455284552843, "success_rate.epoch.env.logic": 0.5831932773109244, "success_rate.epoch.env.math": 0.9751724137931035, "success_rate.epoch.env.sat": 0.10328638497652583, "success_rate.epoch.env.science": 0.9547079856972587, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5970614873322806, "success_rate.epoch.global": 0.8511824649105941, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99609375, "tokens_p.mean_in_band": 0.5948988970588235, "tokens_rate.above_band": 0.974124809741248, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0258751902587519 }, { "epoch": 1.5383976063829787, "grad_norm": 177.5036342591982, "learning_rate": 1.6523292152811166e-07, "loss": 0.2806, "step": 9255, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45528455284552843, "success_rate.epoch.env.logic": 0.5838926174496645, "success_rate.epoch.env.math": 0.9752066115702479, "success_rate.epoch.env.sat": 0.102803738317757, "success_rate.epoch.env.science": 0.95473496128648, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5970867479546749, "success_rate.epoch.global": 0.8511333077218594, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9954461457233369, "tokens_p.mean_in_band": 0.6564360119047619, "tokens_rate.above_band": 0.9575328614762386, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.042467138523761376 }, { "epoch": 1.5392287234042552, "grad_norm": 64.04737764050743, "learning_rate": 1.6521496557372423e-07, "loss": 0.3478, "step": 9260, "success_rate.epoch.env.abd": 0.5555555555555556, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4596774193548387, "success_rate.epoch.env.logic": 0.5845896147403685, "success_rate.epoch.env.math": 0.9752066115702479, "success_rate.epoch.env.sat": 0.10232558139534884, "success_rate.epoch.env.science": 0.9548022598870056, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5975121120890505, "success_rate.epoch.global": 0.8511699271192942, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9976615646258503, "tokens_p.mean_in_band": 0.6053385416666667, "tokens_rate.above_band": 0.9607843137254902, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0392156862745098 }, { "epoch": 1.540059840425532, "grad_norm": 186.3566727121957, "learning_rate": 1.6519703585915075e-07, "loss": 0.3361, "step": 9265, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4596774193548387, "success_rate.epoch.env.logic": 0.5845896147403685, "success_rate.epoch.env.math": 0.9753086419753086, "success_rate.epoch.env.sat": 0.10232558139534884, "success_rate.epoch.env.science": 0.9545589545589546, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5985912700062342, "success_rate.epoch.global": 0.8512064343163539, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9903846153846154, "tokens_p.mean_below_band": 1.6079866327345371e-09, "tokens_rate.above_band": 0.9936305732484076, "tokens_rate.below_band": 0.006369426751592357, "tokens_rate.in_band": 0.0 }, { "epoch": 1.5408909574468086, "grad_norm": 492.8450407687255, "learning_rate": 1.6517913241621206e-07, "loss": 0.3513, "step": 9270, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4596774193548387, "success_rate.epoch.env.logic": 0.5843071786310517, "success_rate.epoch.env.math": 0.9753761969904241, "success_rate.epoch.env.sat": 0.10185185185185185, "success_rate.epoch.env.science": 0.9545994065281899, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5985323464909194, "success_rate.epoch.global": 0.8510516252390057, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.998309748427673, "tokens_p.mean_in_band": 0.5865478515625, "tokens_rate.above_band": 0.9613059250302297, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03869407496977025 }, { "epoch": 1.541722074468085, "grad_norm": 60.77314750814437, "learning_rate": 1.651612552766824e-07, "loss": 0.2766, "step": 9275, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4596774193548387, "success_rate.epoch.env.logic": 0.5843071786310517, "success_rate.epoch.env.math": 0.9754098360655737, "success_rate.epoch.env.sat": 0.10185185185185185, "success_rate.epoch.env.science": 0.9546532305868406, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5985402976849012, "success_rate.epoch.global": 0.8511938872970392, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9884370701513068, "tokens_p.mean_in_band": 0.6088337725903614, "tokens_rate.above_band": 0.8141097424412094, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1858902575587906 }, { "epoch": 1.5425531914893615, "grad_norm": 45.810983873685004, "learning_rate": 1.6514340447228926e-07, "loss": 0.2671, "step": 9280, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.4523809523809524, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4596774193548387, "success_rate.epoch.env.logic": 0.5843071786310517, "success_rate.epoch.env.math": 0.9754098360655737, "success_rate.epoch.env.sat": 0.10185185185185185, "success_rate.epoch.env.science": 0.9544108940201302, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5985182670879277, "success_rate.epoch.global": 0.8511166253101737, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9810344827586207, "tokens_p.mean_below_band": 7.566995918750763e-10, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9863945578231292, "tokens_rate.below_band": 0.006802721088435374, "tokens_rate.in_band": 0.006802721088435374 }, { "epoch": 1.5433843085106385, "grad_norm": 77.89024446883836, "learning_rate": 1.651255800347135e-07, "loss": 0.2916, "step": 9285, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.4566929133858268, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4596774193548387, "success_rate.epoch.env.logic": 0.5833333333333334, "success_rate.epoch.env.math": 0.975443383356071, "success_rate.epoch.env.sat": 0.10185185185185185, "success_rate.epoch.env.science": 0.9544648137196925, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5988296836967654, "success_rate.epoch.global": 0.8511246664125047, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9985508601346298, "tokens_p.mean_in_band": 0.6669170673076923, "tokens_rate.above_band": 0.9903703703703703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00962962962962963 }, { "epoch": 1.544215425531915, "grad_norm": 73.94830135978621, "learning_rate": 1.6510778199558913e-07, "loss": 0.3128, "step": 9290, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.4566929133858268, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.456, "success_rate.epoch.env.logic": 0.5830564784053156, "success_rate.epoch.env.math": 0.975443383356071, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9545320342485976, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5984336457761708, "success_rate.epoch.global": 0.8508087535680304, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.375, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.994963171577123, "tokens_p.mean_in_band": 0.708, "tokens_rate.above_band": 0.9022673964034402, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09773260359655982 }, { "epoch": 1.5450465425531914, "grad_norm": 36.945497337701575, "learning_rate": 1.6509001038650327e-07, "loss": 0.2269, "step": 9295, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.4566929133858268, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.456, "success_rate.epoch.env.logic": 0.582089552238806, "success_rate.epoch.env.math": 0.975443383356071, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9546258102533883, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5983542684887418, "success_rate.epoch.global": 0.850845525365761, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987830996884736, "tokens_p.mean_in_band": 0.7193509615384616, "tokens_rate.above_band": 0.9801526717557252, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01984732824427481 }, { "epoch": 1.545877659574468, "grad_norm": 42.21455029417714, "learning_rate": 1.650722652389962e-07, "loss": 0.2648, "step": 9300, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.4566929133858268, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.456, "success_rate.epoch.env.logic": 0.582089552238806, "success_rate.epoch.env.math": 0.975443383356071, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9546925566343042, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5983603363415523, "success_rate.epoch.global": 0.8509870918754746, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9935722976434426, "tokens_p.mean_in_band": 0.5723967535335689, "tokens_rate.above_band": 0.8733780760626398, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1266219239373602 }, { "epoch": 1.5467087765957448, "grad_norm": 86.4142358346517, "learning_rate": 1.6505454658456117e-07, "loss": 0.2932, "step": 9305, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.4609375, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.456, "success_rate.epoch.env.logic": 0.5801652892561984, "success_rate.epoch.env.math": 0.9754768392370572, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9547192002352249, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5985767384427771, "success_rate.epoch.global": 0.8507773985589685, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9999546114742193, "tokens_p.mean_in_band": 0.52734375, "tokens_rate.above_band": 0.9835714285714285, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016428571428571428 }, { "epoch": 1.5475398936170213, "grad_norm": 97.88257115794653, "learning_rate": 1.650368544546445e-07, "loss": 0.2481, "step": 9310, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.4573643410852713, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.456, "success_rate.epoch.env.logic": 0.5801652892561984, "success_rate.epoch.env.math": 0.9755766621438263, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9547458125183661, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5982633999223391, "success_rate.epoch.global": 0.8507575757575757, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9973347547974414, "tokens_p.mean_in_band": 0.6829044117647058, "tokens_rate.above_band": 0.9650205761316872, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03497942386831276 }, { "epoch": 1.5483710106382977, "grad_norm": 579.3161108939388, "learning_rate": 1.6501918888064538e-07, "loss": 0.3687, "step": 9315, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.4573643410852713, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.456, "success_rate.epoch.env.logic": 0.5792079207920792, "success_rate.epoch.env.math": 0.975609756097561, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9547989433519225, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5981842050426274, "success_rate.epoch.global": 0.8507377979568672, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.993786023054755, "tokens_p.mean_in_band": 0.5829741379310345, "tokens_rate.above_band": 0.9598893499308437, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.040110650069156296 }, { "epoch": 1.5492021276595744, "grad_norm": 62.92559590765751, "learning_rate": 1.6500154989391587e-07, "loss": 0.1669, "step": 9320, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.4573643410852713, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4523809523809524, "success_rate.epoch.env.logic": 0.5792079207920792, "success_rate.epoch.env.math": 0.9756756756756757, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9548387096774194, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5978648085230422, "success_rate.epoch.global": 0.8507180650037793, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9895728008088979, "tokens_p.mean_in_band": 0.6317204301075269, "tokens_rate.above_band": 0.8417021276595744, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15829787234042553 }, { "epoch": 1.5500332446808511, "grad_norm": 232.18577267766102, "learning_rate": 1.6498393752576081e-07, "loss": 0.4462, "step": 9325, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.4573643410852713, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5792079207920792, "success_rate.epoch.env.math": 0.9756756756756757, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9548916227299356, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5982616152555323, "success_rate.epoch.global": 0.850858976779309, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9914579796264855, "tokens_p.mean_in_band": 0.6620065789473685, "tokens_rate.above_band": 0.96875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03125 }, { "epoch": 1.5508643617021276, "grad_norm": 42.94920339151284, "learning_rate": 1.649663518074379e-07, "loss": 0.3647, "step": 9330, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.4573643410852713, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5799011532125206, "success_rate.epoch.env.math": 0.9757412398921833, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9549575899385785, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5983365937869499, "success_rate.epoch.global": 0.8510838831291234, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9958791208791209, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.5516954787234043, "grad_norm": 37.14909379555877, "learning_rate": 1.6494879277015744e-07, "loss": 0.2881, "step": 9335, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.45384615384615384, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.580327868852459, "success_rate.epoch.env.math": 0.9757412398921833, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9550102249488752, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5980603359152333, "success_rate.epoch.global": 0.8509316770186336, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9985271516393442, "tokens_p.mean_in_band": 0.5784040178571429, "tokens_rate.above_band": 0.9858585858585859, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014141414141414142 }, { "epoch": 1.552526595744681, "grad_norm": 35.48011641662896, "learning_rate": 1.6493126044508246e-07, "loss": 0.3229, "step": 9340, "success_rate.epoch.env.abd": 0.5675675675675675, "success_rate.epoch.env.agentgym:alfworld": 0.45384615384615384, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.580327868852459, "success_rate.epoch.env.math": 0.9757412398921833, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.954784130688448, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.598039781891558, "success_rate.epoch.global": 0.8508837908988341, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4642857142857143, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9931139122315593, "tokens_p.mean_below_band": 2.9976945370435715e-09, "tokens_p.mean_in_band": 0.7180989583333334, "tokens_rate.above_band": 0.946113074204947, "tokens_rate.below_band": 0.0008833922261484099, "tokens_rate.in_band": 0.053003533568904596 }, { "epoch": 1.5533577127659575, "grad_norm": 46.997020652647606, "learning_rate": 1.649137548633285e-07, "loss": 0.2568, "step": 9345, "success_rate.epoch.env.abd": 0.5526315789473685, "success_rate.epoch.env.agentgym:alfworld": 0.4580152671755725, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5810147299509002, "success_rate.epoch.env.math": 0.9757738896366084, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9548499854354792, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5971323719187505, "success_rate.epoch.global": 0.8509480007508917, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9955163043478261, "tokens_p.mean_below_band": 8.866190910339355e-07, "tokens_p.mean_in_band": 0.12465504335001788, "tokens_rate.above_band": 0.3303015797032073, "tokens_rate.below_band": 0.00023934897079942556, "tokens_rate.in_band": 0.6694590713259932 }, { "epoch": 1.554188829787234, "grad_norm": 331.0569236958991, "learning_rate": 1.648962760559637e-07, "loss": 0.1984, "step": 9350, "success_rate.epoch.env.abd": 0.5641025641025641, "success_rate.epoch.env.agentgym:alfworld": 0.4580152671755725, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5816993464052288, "success_rate.epoch.env.math": 0.9758389261744966, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9548631333721608, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5982445342900318, "success_rate.epoch.global": 0.8510877719429858, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9986293859649122, "tokens_p.mean_in_band": 0.657421875, "tokens_rate.above_band": 0.9876237623762376, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012376237623762377 }, { "epoch": 1.5550199468085106, "grad_norm": 64.31501432388835, "learning_rate": 1.6487882405400865e-07, "loss": 0.2072, "step": 9355, "success_rate.epoch.env.abd": 0.5641025641025641, "success_rate.epoch.env.agentgym:alfworld": 0.4580152671755725, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5816993464052288, "success_rate.epoch.env.math": 0.9758713136729222, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9549287583599884, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5982534445160548, "success_rate.epoch.global": 0.8512551517422255, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9924812030075187, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9925373134328358, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007462686567164179 }, { "epoch": 1.5558510638297873, "grad_norm": 41.48157066632761, "learning_rate": 1.6486139888843637e-07, "loss": 0.2507, "step": 9360, "success_rate.epoch.env.abd": 0.5641025641025641, "success_rate.epoch.env.agentgym:alfworld": 0.4580152671755725, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5816993464052288, "success_rate.epoch.env.math": 0.9759358288770054, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9550333623440673, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5982688189877059, "success_rate.epoch.global": 0.8515332834704562, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9939393939393939, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9939759036144579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006024096385542169 }, { "epoch": 1.5566821808510638, "grad_norm": 67.48113133943718, "learning_rate": 1.648440005901723e-07, "loss": 0.1964, "step": 9365, "success_rate.epoch.env.abd": 0.5641025641025641, "success_rate.epoch.env.agentgym:alfworld": 0.4580152671755725, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5816993464052288, "success_rate.epoch.env.math": 0.976, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9550594375181212, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5982770231965285, "success_rate.epoch.global": 0.851644245142003, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9982184129429892, "tokens_p.mean_in_band": 0.6770833333333334, "tokens_rate.above_band": 0.9818456883509834, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018154311649016642 }, { "epoch": 1.5575132978723403, "grad_norm": 20.292430926730685, "learning_rate": 1.648266291900941e-07, "loss": 0.262, "step": 9370, "success_rate.epoch.env.abd": 0.5641025641025641, "success_rate.epoch.env.agentgym:alfworld": 0.4580152671755725, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5807504078303426, "success_rate.epoch.env.math": 0.9760319573901465, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9550984936268829, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5981972118259851, "success_rate.epoch.global": 0.851596042561135, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9998559907834101, "tokens_p.mean_in_band": 0.5848214285714286, "tokens_rate.above_band": 0.9893617021276596, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010638297872340425 }, { "epoch": 1.558344414893617, "grad_norm": 40.61589001796227, "learning_rate": 1.6480928471903182e-07, "loss": 0.2794, "step": 9375, "success_rate.epoch.env.abd": 0.5641025641025641, "success_rate.epoch.env.agentgym:alfworld": 0.4580152671755725, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5804878048780487, "success_rate.epoch.env.math": 0.9760319573901465, "success_rate.epoch.env.sat": 0.10138248847926268, "success_rate.epoch.env.science": 0.9551244933410539, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5981757024407012, "success_rate.epoch.global": 0.8515202387614251, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9958541927409261, "tokens_p.mean_in_band": 0.6008831521739131, "tokens_rate.above_band": 0.9455621301775148, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.054437869822485205 }, { "epoch": 1.5591755319148937, "grad_norm": 122.61954597959252, "learning_rate": 1.6479196720776757e-07, "loss": 0.1888, "step": 9380, "success_rate.epoch.env.abd": 0.5641025641025641, "success_rate.epoch.env.agentgym:alfworld": 0.4580152671755725, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5804878048780487, "success_rate.epoch.env.math": 0.9760956175298805, "success_rate.epoch.env.sat": 0.10091743119266056, "success_rate.epoch.env.science": 0.9551764025448236, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5981439308095104, "success_rate.epoch.global": 0.8515275707898659, "success_rate.window.env.abd": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9938134517766497, "tokens_p.mean_below_band": 2.1736923372372985e-10, "tokens_p.mean_in_band": 0.5928308823529411, "tokens_rate.above_band": 0.9162790697674419, "tokens_rate.below_band": 0.004651162790697674, "tokens_rate.in_band": 0.07906976744186046 }, { "epoch": 1.5600066489361701, "grad_norm": 21.818336979738863, "learning_rate": 1.6477467668703576e-07, "loss": 0.3972, "step": 9385, "success_rate.epoch.env.abd": 0.5641025641025641, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5804878048780487, "success_rate.epoch.env.math": 0.976158940397351, "success_rate.epoch.env.sat": 0.1004566210045662, "success_rate.epoch.env.science": 0.9552152557064433, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5977958901923175, "success_rate.epoch.global": 0.8513488372093023, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9972987959442332, "tokens_p.mean_in_band": 0.47944630872483224, "tokens_rate.above_band": 0.9636052760136785, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.036394723986321445 }, { "epoch": 1.5608377659574468, "grad_norm": 206.48780514015917, "learning_rate": 1.6475741318752278e-07, "loss": 0.2428, "step": 9390, "success_rate.epoch.env.abd": 0.5641025641025641, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5795454545454546, "success_rate.epoch.env.math": 0.9762219286657859, "success_rate.epoch.env.sat": 0.1004566210045662, "success_rate.epoch.env.science": 0.9549783549783549, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5976944117566586, "success_rate.epoch.global": 0.8511705685618729, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9981694560669456, "tokens_p.mean_in_band": 0.57625, "tokens_rate.above_band": 0.9348109517601043, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0651890482398957 }, { "epoch": 1.5616688829787235, "grad_norm": 51.58350460559685, "learning_rate": 1.6474017673986714e-07, "loss": 0.2876, "step": 9395, "success_rate.epoch.env.abd": 0.5641025641025641, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.580906148867314, "success_rate.epoch.env.math": 0.9762219286657859, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.955043227665706, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5977824977570808, "success_rate.epoch.global": 0.8512059369202226, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9956487341772152, "tokens_p.mean_in_band": 0.6483347039473685, "tokens_rate.above_band": 0.9614604462474645, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.038539553752535496 }, { "epoch": 1.5625, "grad_norm": 261.705234518782, "learning_rate": 1.6472296737465927e-07, "loss": 0.2942, "step": 9400, "success_rate.epoch.env.abd": 0.5641025641025641, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.580906148867314, "success_rate.epoch.env.math": 0.9762532981530343, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9550949913644214, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5977900553194412, "success_rate.epoch.global": 0.8513438368860056, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9921445741758241, "tokens_p.mean_in_band": 0.6350520833333333, "tokens_rate.above_band": 0.9066002490660025, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09339975093399751 }, { "epoch": 1.5633311170212765, "grad_norm": 56.94746983989885, "learning_rate": 1.647057851224416e-07, "loss": 0.3251, "step": 9405, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5799676898222941, "success_rate.epoch.env.math": 0.9762532981530343, "success_rate.epoch.env.sat": 0.09954751131221719, "success_rate.epoch.env.science": 0.9551208285385501, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5986566303502378, "success_rate.epoch.global": 0.8511111111111112, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9918650793650794, "tokens_p.mean_in_band": 0.6862909226190477, "tokens_rate.above_band": 0.9183673469387755, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08163265306122448 }, { "epoch": 1.5641622340425532, "grad_norm": 116.8976036136796, "learning_rate": 1.6468863001370844e-07, "loss": 0.2649, "step": 9410, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5787781350482315, "success_rate.epoch.env.math": 0.9763157894736842, "success_rate.epoch.env.sat": 0.09954751131221719, "success_rate.epoch.env.science": 0.9551466359976998, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5985565161689411, "success_rate.epoch.global": 0.8509339744775292, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986178785607196, "tokens_p.mean_in_band": 0.5684523809523809, "tokens_rate.above_band": 0.9845018450184502, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015498154981549815 }, { "epoch": 1.5649933510638299, "grad_norm": 46.93117213931343, "learning_rate": 1.646715020789059e-07, "loss": 0.2373, "step": 9415, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5787781350482315, "success_rate.epoch.env.math": 0.9763469119579501, "success_rate.epoch.env.sat": 0.09954751131221719, "success_rate.epoch.env.science": 0.9552110249784669, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5985651990293986, "success_rate.epoch.global": 0.8510992056161094, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9946875, "tokens_p.mean_in_band": 0.750244140625, "tokens_rate.above_band": 0.9259259259259259, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07407407407407407 }, { "epoch": 1.5658244680851063, "grad_norm": 115.72460639209886, "learning_rate": 1.6465440134843186e-07, "loss": 0.1813, "step": 9420, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4566929133858268, "success_rate.epoch.env.logic": 0.5787781350482315, "success_rate.epoch.env.math": 0.9763469119579501, "success_rate.epoch.env.sat": 0.09954751131221719, "success_rate.epoch.env.science": 0.9553008595988539, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5985733658130702, "success_rate.epoch.global": 0.8512915129151292, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9904057017543859, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.991304347826087, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008695652173913044 }, { "epoch": 1.566655585106383, "grad_norm": 28.635721832078602, "learning_rate": 1.6463732785263593e-07, "loss": 0.2779, "step": 9425, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4609375, "success_rate.epoch.env.logic": 0.5787781350482315, "success_rate.epoch.env.math": 0.9763469119579501, "success_rate.epoch.env.sat": 0.0990990990990991, "success_rate.epoch.env.science": 0.9553392499284283, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5989219626067638, "success_rate.epoch.global": 0.8512442396313364, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9950657894736842, "tokens_p.mean_in_band": 0.65171875, "tokens_rate.above_band": 0.9673629242819843, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03263707571801567 }, { "epoch": 1.5674867021276597, "grad_norm": 50.34213786609767, "learning_rate": 1.6462028162181937e-07, "loss": 0.2623, "step": 9430, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4609375, "success_rate.epoch.env.logic": 0.5787781350482315, "success_rate.epoch.env.math": 0.9764089121887287, "success_rate.epoch.env.sat": 0.09865470852017937, "success_rate.epoch.env.science": 0.9553775743707094, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5988906838880492, "success_rate.epoch.global": 0.8512244522187442, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.991796875, "tokens_p.mean_in_band": 0.6741071428571429, "tokens_rate.above_band": 0.9195402298850575, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08045977011494253 }, { "epoch": 1.5683178191489362, "grad_norm": 58.34430369348738, "learning_rate": 1.6460326268623505e-07, "loss": 0.1321, "step": 9435, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4573643410852713, "success_rate.epoch.env.logic": 0.5787781350482315, "success_rate.epoch.env.math": 0.9764397905759162, "success_rate.epoch.env.sat": 0.09865470852017937, "success_rate.epoch.env.science": 0.9554413024850043, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5985744518504813, "success_rate.epoch.global": 0.8512320706141964, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.99185667752443, "tokens_p.mean_in_band": 0.6206704125615764, "tokens_rate.above_band": 0.8193950177935944, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1806049822064057 }, { "epoch": 1.5691489361702127, "grad_norm": 188.27202069788194, "learning_rate": 1.6458627107608748e-07, "loss": 0.3187, "step": 9440, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45384615384615384, "success_rate.epoch.env.logic": 0.5787781350482315, "success_rate.epoch.env.math": 0.9764397905759162, "success_rate.epoch.env.sat": 0.09865470852017937, "success_rate.epoch.env.science": 0.9555175363558597, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5982615469988212, "success_rate.epoch.global": 0.8512396694214877, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9886904761904762, "tokens_p.mean_in_band": 0.6556640625, "tokens_rate.above_band": 0.9051724137931034, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09482758620689655 }, { "epoch": 1.5699800531914894, "grad_norm": 69.67043847586811, "learning_rate": 1.645693068215325e-07, "loss": 0.2209, "step": 9445, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45384615384615384, "success_rate.epoch.env.logic": 0.5801282051282052, "success_rate.epoch.env.math": 0.9764397905759162, "success_rate.epoch.env.sat": 0.09865470852017937, "success_rate.epoch.env.science": 0.9555555555555556, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5983877369333366, "success_rate.epoch.global": 0.8513761467889909, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959795321637427, "tokens_p.mean_in_band": 0.6770833333333334, "tokens_rate.above_band": 0.9661016949152542, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03389830508474576 }, { "epoch": 1.570811170212766, "grad_norm": 14.729485109307214, "learning_rate": 1.6455236995267761e-07, "loss": 0.2068, "step": 9450, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45384615384615384, "success_rate.epoch.env.logic": 0.5801282051282052, "success_rate.epoch.env.math": 0.9764397905759162, "success_rate.epoch.env.sat": 0.09865470852017937, "success_rate.epoch.env.science": 0.9556187766714083, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.598393484307505, "success_rate.epoch.global": 0.851512373968836, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959692716236722, "tokens_p.mean_in_band": 0.6458333333333334, "tokens_rate.above_band": 0.9734121122599705, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026587887740029542 }, { "epoch": 1.5716422872340425, "grad_norm": 57.47890689904436, "learning_rate": 1.6453546049958153e-07, "loss": 0.2622, "step": 9455, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45384615384615384, "success_rate.epoch.env.logic": 0.5808, "success_rate.epoch.env.math": 0.9764397905759162, "success_rate.epoch.env.sat": 0.09865470852017937, "success_rate.epoch.env.science": 0.9557069846678024, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5984625754773404, "success_rate.epoch.global": 0.8517298187808896, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9963422391857506, "tokens_p.mean_in_band": 0.73046875, "tokens_rate.above_band": 0.9949367088607595, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005063291139240506 }, { "epoch": 1.572473404255319, "grad_norm": 149.27959587221156, "learning_rate": 1.645185784922544e-07, "loss": 0.2552, "step": 9460, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45384615384615384, "success_rate.epoch.env.logic": 0.5808, "success_rate.epoch.env.math": 0.9764397905759162, "success_rate.epoch.env.sat": 0.09865470852017937, "success_rate.epoch.env.science": 0.9557572319909245, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.598467143415806, "success_rate.epoch.global": 0.8518383025425279, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9958969465648855, "tokens_p.mean_in_band": 0.609375, "tokens_rate.above_band": 0.9675036927621861, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03249630723781388 }, { "epoch": 1.5733045212765957, "grad_norm": 54.73657587729029, "learning_rate": 1.645017239606577e-07, "loss": 0.5639, "step": 9465, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45384615384615384, "success_rate.epoch.env.logic": 0.5808, "success_rate.epoch.env.math": 0.9765013054830287, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.9558073654390935, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5984372548293867, "success_rate.epoch.global": 0.8518450858604312, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.992231638418079, "tokens_p.mean_in_band": 0.6428571428571429, "tokens_rate.above_band": 0.9267015706806283, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07329842931937172 }, { "epoch": 1.5741356382978724, "grad_norm": 74.3583783496278, "learning_rate": 1.6448489693470409e-07, "loss": 0.2607, "step": 9470, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.4166666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45384615384615384, "success_rate.epoch.env.logic": 0.5808, "success_rate.epoch.env.math": 0.9765013054830287, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.9558698727015559, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5984429373077924, "success_rate.epoch.global": 0.8519802883737908, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9988715277777778, "tokens_p.mean_in_band": 0.7268318965517241, "tokens_rate.above_band": 0.9612817089452603, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03871829105473965 }, { "epoch": 1.5749667553191489, "grad_norm": 88.98239818161186, "learning_rate": 1.6446809744425737e-07, "loss": 0.291, "step": 9475, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.46153846153846156, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4580152671755725, "success_rate.epoch.env.logic": 0.5798722044728435, "success_rate.epoch.env.math": 0.9765319426336375, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.9559322033898305, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6028253082635143, "success_rate.epoch.global": 0.8520408163265306, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992451690821256, "tokens_p.mean_in_band": 0.5435697115384616, "tokens_rate.above_band": 0.9922109047333733, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007789095266626723 }, { "epoch": 1.5757978723404256, "grad_norm": 74.856323054572, "learning_rate": 1.6445132551913253e-07, "loss": 0.4029, "step": 9480, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.46153846153846156, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4580152671755725, "success_rate.epoch.env.logic": 0.5798722044728435, "success_rate.epoch.env.math": 0.9765319426336375, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.9559819413092551, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6028298298925528, "success_rate.epoch.global": 0.8521485797523671, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9969848732624693, "tokens_p.mean_in_band": 0.5092329545454546, "tokens_rate.above_band": 0.9652722967640095, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03472770323599053 }, { "epoch": 1.5766289893617023, "grad_norm": 95.13952732624814, "learning_rate": 1.6443458118909562e-07, "loss": 0.2159, "step": 9485, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.46153846153846156, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4580152671755725, "success_rate.epoch.env.logic": 0.580542264752791, "success_rate.epoch.env.math": 0.9765625, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.9560439560439561, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6028991601090085, "success_rate.epoch.global": 0.8523367885070012, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9922855648535565, "tokens_p.mean_in_band": 0.83203125, "tokens_rate.above_band": 0.991701244813278, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008298755186721992 }, { "epoch": 1.5774601063829787, "grad_norm": 93.78763965216086, "learning_rate": 1.6441786448386378e-07, "loss": 0.1987, "step": 9490, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.46153846153846156, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4580152671755725, "success_rate.epoch.env.logic": 0.5812101910828026, "success_rate.epoch.env.math": 0.976592977893368, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.9560934421615536, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6029671501400063, "success_rate.epoch.global": 0.8524977293369664, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936440677966102, "tokens_p.mean_in_band": 0.673828125, "tokens_rate.above_band": 0.9833333333333333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016666666666666666 }, { "epoch": 1.5782912234042552, "grad_norm": 84.33063695440337, "learning_rate": 1.6440117543310504e-07, "loss": 0.2803, "step": 9495, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.5812101910828026, "success_rate.epoch.env.math": 0.976592977893368, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.9561674627704412, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5996614387774369, "success_rate.epoch.global": 0.8523489932885906, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9918569627346596, "tokens_p.mean_in_band": 0.5301011029411765, "tokens_rate.above_band": 0.8332944197992062, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16670558020079385 }, { "epoch": 1.579122340425532, "grad_norm": 67.26672162826699, "learning_rate": 1.6438451406643834e-07, "loss": 0.246, "step": 9500, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.5800316957210776, "success_rate.epoch.env.math": 0.9766233766233766, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.9562166713443727, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5995615398630929, "success_rate.epoch.global": 0.8522006882811085, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983010708401977, "tokens_p.mean_in_band": 0.6546415441176471, "tokens_rate.above_band": 0.9727564102564102, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027243589743589744 }, { "epoch": 1.5799534574468086, "grad_norm": 1350.4006573297422, "learning_rate": 1.6436788041343357e-07, "loss": 0.3314, "step": 9505, "success_rate.epoch.env.abd": 0.575, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.5800316957210776, "success_rate.epoch.env.math": 0.9767141009055628, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.9562535053280987, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.5995731360690848, "success_rate.epoch.global": 0.852361136240275, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9972067039106145, "tokens_p.mean_in_band": 0.5234375, "tokens_rate.above_band": 0.9835164835164835, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016483516483516484 }, { "epoch": 1.580784574468085, "grad_norm": 148.16181873774264, "learning_rate": 1.6435127450361135e-07, "loss": 0.2241, "step": 9510, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.5806962025316456, "success_rate.epoch.env.math": 0.9767741935483871, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.9563025210084034, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6005858150501969, "success_rate.epoch.global": 0.8525745257452575, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9953427189988624, "tokens_p.mean_in_band": 0.8008814102564102, "tokens_rate.above_band": 0.9575163398692811, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.042483660130718956 }, { "epoch": 1.5816156914893615, "grad_norm": 46.40845712135668, "learning_rate": 1.6433469636644316e-07, "loss": 0.2595, "step": 9515, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.5806962025316456, "success_rate.epoch.env.math": 0.9768339768339769, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.9563758389261745, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6005979151595935, "success_rate.epoch.global": 0.8527872992964098, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9915497448979592, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.5824468085106385, "grad_norm": 94.97964929442708, "learning_rate": 1.6431814603135105e-07, "loss": 0.2735, "step": 9520, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45864661654135336, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.5806962025316456, "success_rate.epoch.env.math": 0.9768637532133676, "success_rate.epoch.env.sat": 0.09821428571428571, "success_rate.epoch.env.science": 0.9564732142857143, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6009823073173052, "success_rate.epoch.global": 0.8530524041058887, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9960241730279898, "tokens_p.mean_in_band": 0.7786458333333334, "tokens_rate.above_band": 0.9924242424242424, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007575757575757576 }, { "epoch": 1.583277925531915, "grad_norm": 24.78217072807551, "learning_rate": 1.6430162352770787e-07, "loss": 0.2479, "step": 9525, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45864661654135336, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.5806962025316456, "success_rate.epoch.env.math": 0.9768934531450578, "success_rate.epoch.env.sat": 0.09777777777777778, "success_rate.epoch.env.science": 0.9565217391304348, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6009497361209327, "success_rate.epoch.global": 0.8530311207051628, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9860026041666666, "tokens_p.mean_in_band": 0.6493055555555556, "tokens_rate.above_band": 0.9142857142857143, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08571428571428572 }, { "epoch": 1.5841090425531914, "grad_norm": 47.859974559607, "learning_rate": 1.64285128884837e-07, "loss": 0.2566, "step": 9530, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45864661654135336, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45454545454545453, "success_rate.epoch.env.logic": 0.580441640378549, "success_rate.epoch.env.math": 0.9769230769230769, "success_rate.epoch.env.sat": 0.09777777777777778, "success_rate.epoch.env.science": 0.9565580618212197, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6009325892405425, "success_rate.epoch.global": 0.8530098831985624, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996728650137741, "tokens_p.mean_in_band": 0.5246394230769231, "tokens_rate.above_band": 0.9824086603518268, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017591339648173207 }, { "epoch": 1.584940159574468, "grad_norm": 75.39368286501771, "learning_rate": 1.642686621320124e-07, "loss": 0.2129, "step": 9535, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45864661654135336, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45864661654135336, "success_rate.epoch.env.logic": 0.580441640378549, "success_rate.epoch.env.math": 0.9769526248399488, "success_rate.epoch.env.sat": 0.10176991150442478, "success_rate.epoch.env.science": 0.9566184649610678, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6016765207659304, "success_rate.epoch.global": 0.8532208864166517, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7666666666666667, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9933505455962588, "tokens_p.mean_below_band": 3.1650415621697903e-10, "tokens_p.mean_in_band": 0.7725183823529411, "tokens_rate.above_band": 0.9734446130500759, "tokens_rate.below_band": 0.0007587253414264037, "tokens_rate.in_band": 0.025796661608497723 }, { "epoch": 1.5857712765957448, "grad_norm": 103.13253511809297, "learning_rate": 1.642522232984585e-07, "loss": 0.2818, "step": 9540, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45864661654135336, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45864661654135336, "success_rate.epoch.env.logic": 0.580441640378549, "success_rate.epoch.env.math": 0.9770408163265306, "success_rate.epoch.env.sat": 0.10176991150442478, "success_rate.epoch.env.science": 0.9563888888888888, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6016636676217851, "success_rate.epoch.global": 0.853225806451613, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974888392857143, "tokens_p.mean_in_band": 0.5558035714285714, "tokens_rate.above_band": 0.96, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04 }, { "epoch": 1.5866023936170213, "grad_norm": 47.3144813182269, "learning_rate": 1.6423581241335022e-07, "loss": 0.2696, "step": 9545, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45864661654135336, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45864661654135336, "success_rate.epoch.env.logic": 0.580441640378549, "success_rate.epoch.env.math": 0.9770700636942675, "success_rate.epoch.env.sat": 0.10176991150442478, "success_rate.epoch.env.science": 0.9564614531336662, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6016729232229228, "success_rate.epoch.global": 0.8534097010918203, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9987541528239202, "tokens_p.mean_in_band": 0.7045036764705882, "tokens_rate.above_band": 0.9860769860769861, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013923013923013924 }, { "epoch": 1.5874335106382977, "grad_norm": 50.65825202496181, "learning_rate": 1.6421942950581286e-07, "loss": 0.2596, "step": 9550, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45864661654135336, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4552238805970149, "success_rate.epoch.env.logic": 0.580441640378549, "success_rate.epoch.env.math": 0.9770700636942675, "success_rate.epoch.env.sat": 0.1013215859030837, "success_rate.epoch.env.science": 0.9562569213732004, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6013024147405459, "success_rate.epoch.global": 0.853083109919571, "success_rate.window.env.ded": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.2916666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9902823920265781, "tokens_p.mean_below_band": 1.9744038581848145e-07, "tokens_p.mean_in_band": 0.593987462006079, "tokens_rate.above_band": 0.8201634877384196, "tokens_rate.below_band": 0.0005449591280653951, "tokens_rate.in_band": 0.179291553133515 }, { "epoch": 1.5882646276595744, "grad_norm": 19.74064520363544, "learning_rate": 1.6420307460492206e-07, "loss": 0.257, "step": 9555, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4552238805970149, "success_rate.epoch.env.logic": 0.5811023622047244, "success_rate.epoch.env.math": 0.9770700636942675, "success_rate.epoch.env.sat": 0.1013215859030837, "success_rate.epoch.env.science": 0.9563053097345132, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6017341475414836, "success_rate.epoch.global": 0.8532404927691484, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974671412924425, "tokens_p.mean_in_band": 0.6536458333333334, "tokens_rate.above_band": 0.9902386117136659, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009761388286334056 }, { "epoch": 1.5890957446808511, "grad_norm": 27.400443794872835, "learning_rate": 1.6418674773970376e-07, "loss": 0.3753, "step": 9560, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4552238805970149, "success_rate.epoch.env.logic": 0.5811023622047244, "success_rate.epoch.env.math": 0.9770700636942675, "success_rate.epoch.env.sat": 0.1013215859030837, "success_rate.epoch.env.science": 0.956365644849489, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.601739632551936, "success_rate.epoch.global": 0.853371387798787, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9945886075949367, "tokens_p.mean_below_band": 3.3527612686157227e-07, "tokens_p.mean_in_band": 0.620859375, "tokens_rate.above_band": 0.9511196725258849, "tokens_rate.below_band": 0.0007223693715386468, "tokens_rate.in_band": 0.04815795810257645 }, { "epoch": 1.5899268617021276, "grad_norm": 35.20897838334221, "learning_rate": 1.6417044893913415e-07, "loss": 0.2764, "step": 9565, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4552238805970149, "success_rate.epoch.env.logic": 0.5817610062893082, "success_rate.epoch.env.math": 0.9770700636942675, "success_rate.epoch.env.sat": 0.10043668122270742, "success_rate.epoch.env.science": 0.9564137931034483, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6017234405208601, "success_rate.epoch.global": 0.8531979333689649, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9964102176541717, "tokens_p.mean_in_band": 0.5651483050847458, "tokens_rate.above_band": 0.9334085778781038, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06659142212189616 }, { "epoch": 1.5907579787234043, "grad_norm": 22.305761052885384, "learning_rate": 1.641541782321396e-07, "loss": 0.3602, "step": 9570, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.4626865671641791, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4552238805970149, "success_rate.epoch.env.logic": 0.5824175824175825, "success_rate.epoch.env.math": 0.9770992366412213, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.9564498346196252, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6021446164713742, "success_rate.epoch.global": 0.8533546894465207, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951550387596899, "tokens_p.mean_in_band": 0.6875, "tokens_rate.above_band": 0.979746835443038, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020253164556962026 }, { "epoch": 1.591589095744681, "grad_norm": 90.64966224663716, "learning_rate": 1.6413793564759667e-07, "loss": 0.1337, "step": 9575, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45925925925925926, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4552238805970149, "success_rate.epoch.env.logic": 0.5830721003134797, "success_rate.epoch.env.math": 0.9771573604060914, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.9564977973568282, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6019021888798333, "success_rate.epoch.global": 0.8533854629465079, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965452261306532, "tokens_p.mean_in_band": 0.599609375, "tokens_rate.above_band": 0.995, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005 }, { "epoch": 1.5924202127659575, "grad_norm": 67.55648899327868, "learning_rate": 1.641217212143319e-07, "loss": 0.2594, "step": 9580, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45925925925925926, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4552238805970149, "success_rate.epoch.env.logic": 0.5834633385335414, "success_rate.epoch.env.math": 0.9771863117870723, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.9565217391304348, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6019425644593469, "success_rate.epoch.global": 0.8533641043848749, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980733662145499, "tokens_p.mean_in_band": 0.7001953125, "tokens_rate.above_band": 0.9854191980558931, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014580801944106925 }, { "epoch": 1.593251329787234, "grad_norm": 19.66745065199376, "learning_rate": 1.64105534961122e-07, "loss": 0.2281, "step": 9585, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45925925925925926, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4552238805970149, "success_rate.epoch.env.logic": 0.5841121495327103, "success_rate.epoch.env.math": 0.9771863117870723, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.9565695437053326, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6020058931478984, "success_rate.epoch.global": 0.8534941468605889, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9953218562874252, "tokens_p.mean_in_band": 0.8984375, "tokens_rate.above_band": 0.9985052316890882, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0014947683109118087 }, { "epoch": 1.5940824468085106, "grad_norm": 58.34736606183343, "learning_rate": 1.6408937691669357e-07, "loss": 0.4847, "step": 9590, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45925925925925926, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45185185185185184, "success_rate.epoch.env.logic": 0.5822981366459627, "success_rate.epoch.env.math": 0.9771863117870723, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.9566291517979687, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.601539853735237, "success_rate.epoch.global": 0.853170386114063, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9953892314468179, "tokens_p.mean_in_band": 0.5213496767241379, "tokens_rate.above_band": 0.9072927072927073, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09270729270729271 }, { "epoch": 1.5949135638297873, "grad_norm": 42.210974788508544, "learning_rate": 1.6407324710972318e-07, "loss": 0.3051, "step": 9595, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45925925925925926, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4485294117647059, "success_rate.epoch.env.logic": 0.5829457364341085, "success_rate.epoch.env.math": 0.9772151898734177, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.9566648381788261, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6013025559323464, "success_rate.epoch.global": 0.8531493276716207, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9907378580323786, "tokens_p.mean_below_band": 7.338821887969971e-07, "tokens_p.mean_in_band": 0.55625, "tokens_rate.above_band": 0.8021978021978022, "tokens_rate.below_band": 0.0004995004995004995, "tokens_rate.in_band": 0.1973026973026973 }, { "epoch": 1.5957446808510638, "grad_norm": 131.84954726109595, "learning_rate": 1.6405714556883728e-07, "loss": 0.2865, "step": 9600, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4485294117647059, "success_rate.epoch.env.logic": 0.5826893353941267, "success_rate.epoch.env.math": 0.9772151898734177, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.9567004658810633, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.600975494145453, "success_rate.epoch.global": 0.8529515729939908, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9987898633257403, "tokens_p.mean_in_band": 0.5281575520833334, "tokens_rate.above_band": 0.9733924611973392, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026607538802660754 }, { "epoch": 1.5965757978723403, "grad_norm": 165.81248437530223, "learning_rate": 1.6404107232261217e-07, "loss": 0.3138, "step": 9605, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4485294117647059, "success_rate.epoch.env.logic": 0.5826893353941267, "success_rate.epoch.env.math": 0.9773013871374527, "success_rate.epoch.env.sat": 0.10434782608695652, "success_rate.epoch.env.science": 0.9567360350492881, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.600986563821113, "success_rate.epoch.global": 0.8531073446327684, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9931913407821229, "tokens_p.mean_in_band": 0.6983506944444444, "tokens_rate.above_band": 0.9086294416243654, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09137055837563451 }, { "epoch": 1.597406914893617, "grad_norm": 62.12407310166352, "learning_rate": 1.6402502739957396e-07, "loss": 0.4182, "step": 9610, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45588235294117646, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4485294117647059, "success_rate.epoch.env.logic": 0.5817901234567902, "success_rate.epoch.env.math": 0.9761306532663316, "success_rate.epoch.env.sat": 0.1038961038961039, "success_rate.epoch.env.science": 0.9567833698030634, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6007616244351552, "success_rate.epoch.global": 0.8528115635466244, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.41666666666666663, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9942064606741573, "tokens_p.mean_below_band": 6.565414878423326e-12, "tokens_p.mean_in_band": 0.66015625, "tokens_rate.above_band": 0.9595687331536388, "tokens_rate.below_band": 0.0013477088948787063, "tokens_rate.in_band": 0.03908355795148248 }, { "epoch": 1.5982380319148937, "grad_norm": 81.57105460252536, "learning_rate": 1.640090108281984e-07, "loss": 0.1507, "step": 9615, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45985401459854014, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4485294117647059, "success_rate.epoch.env.logic": 0.5815384615384616, "success_rate.epoch.env.math": 0.9761606022584692, "success_rate.epoch.env.sat": 0.1038961038961039, "success_rate.epoch.env.science": 0.9568188029516261, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6011057500605858, "success_rate.epoch.global": 0.8528169014084507, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9970580543933054, "tokens_p.mean_in_band": 0.6941105769230769, "tokens_rate.above_band": 0.9865841073271414, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013415892672858616 }, { "epoch": 1.5990691489361701, "grad_norm": 77.24029236531815, "learning_rate": 1.6399302263691102e-07, "loss": 0.1527, "step": 9620, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45985401459854014, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4485294117647059, "success_rate.epoch.env.logic": 0.5815384615384616, "success_rate.epoch.env.math": 0.9761904761904762, "success_rate.epoch.env.sat": 0.1038961038961039, "success_rate.epoch.env.science": 0.9566284779050737, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6010911635956271, "success_rate.epoch.global": 0.8528481012658228, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9913522012578616, "tokens_p.mean_below_band": 4.05634636990726e-10, "tokens_p.mean_in_band": 0.753125, "tokens_rate.above_band": 0.9636363636363636, "tokens_rate.below_band": 0.006060606060606061, "tokens_rate.in_band": 0.030303030303030304 }, { "epoch": 1.5999002659574468, "grad_norm": 84.89153635794803, "learning_rate": 1.639770628540869e-07, "loss": 0.3731, "step": 9625, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45985401459854014, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4485294117647059, "success_rate.epoch.env.logic": 0.5815384615384616, "success_rate.epoch.env.math": 0.975, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.9566639411283728, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.600945450777335, "success_rate.epoch.global": 0.8526519142957499, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5555555555555555, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9918686224489796, "tokens_p.mean_below_band": 4.6629367034256575e-15, "tokens_p.mean_in_band": 0.7026909722222222, "tokens_rate.above_band": 0.9116279069767442, "tokens_rate.below_band": 0.004651162790697674, "tokens_rate.in_band": 0.08372093023255814 }, { "epoch": 1.6007313829787235, "grad_norm": 58.67035964422717, "learning_rate": 1.639611315080508e-07, "loss": 0.2224, "step": 9630, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45985401459854014, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4485294117647059, "success_rate.epoch.env.logic": 0.5815384615384616, "success_rate.epoch.env.math": 0.9750312109862672, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.9567111353117342, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6009525785200286, "success_rate.epoch.global": 0.8527811896824005, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99475, "tokens_p.mean_in_band": 0.8645833333333334, "tokens_rate.above_band": 0.9765625, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0234375 }, { "epoch": 1.6015625, "grad_norm": 67.140955328308, "learning_rate": 1.6394522862707692e-07, "loss": 0.2804, "step": 9635, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45985401459854014, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4485294117647059, "success_rate.epoch.env.logic": 0.5815384615384616, "success_rate.epoch.env.math": 0.9750623441396509, "success_rate.epoch.env.sat": 0.10300429184549356, "success_rate.epoch.env.science": 0.9567582268153386, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.600919327669157, "success_rate.epoch.global": 0.852760736196319, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9902146464646465, "tokens_p.mean_in_band": 0.67236328125, "tokens_rate.above_band": 0.9252336448598131, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07476635514018691 }, { "epoch": 1.6023936170212765, "grad_norm": 51.59720938595861, "learning_rate": 1.6392935423938898e-07, "loss": 0.259, "step": 9640, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45985401459854014, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4485294117647059, "success_rate.epoch.env.logic": 0.5815384615384616, "success_rate.epoch.env.math": 0.975093399750934, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.9568052159739201, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6008864054408364, "success_rate.epoch.global": 0.8527403256872702, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9911556603773585, "tokens_p.mean_in_band": 0.7270220588235294, "tokens_rate.above_band": 0.925764192139738, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07423580786026202 }, { "epoch": 1.6032247340425532, "grad_norm": 70.36176220100445, "learning_rate": 1.6391350837316014e-07, "loss": 0.3587, "step": 9645, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45985401459854014, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4485294117647059, "success_rate.epoch.env.logic": 0.5815384615384616, "success_rate.epoch.env.math": 0.975093399750934, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.9566395663956639, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6008713463882677, "success_rate.epoch.global": 0.8527972027972028, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9901785714285715, "tokens_p.mean_in_band": 0.496875, "tokens_rate.above_band": 0.9767441860465116, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023255813953488372 }, { "epoch": 1.6040558510638299, "grad_norm": 15.792702063112475, "learning_rate": 1.6389769105651295e-07, "loss": 0.3159, "step": 9650, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45985401459854014, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4485294117647059, "success_rate.epoch.env.logic": 0.5806451612903226, "success_rate.epoch.env.math": 0.9751243781094527, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.9566982408660352, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6007982875319723, "success_rate.epoch.global": 0.8528025144054479, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9987927191679049, "tokens_p.mean_in_band": 0.5813337053571429, "tokens_rate.above_band": 0.9600570613409415, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039942938659058486 }, { "epoch": 1.6048869680851063, "grad_norm": 40.85080914535933, "learning_rate": 1.6388190231751922e-07, "loss": 0.3596, "step": 9655, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45985401459854014, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45255474452554745, "success_rate.epoch.env.logic": 0.5806451612903226, "success_rate.epoch.env.math": 0.9751243781094527, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.9567567567567568, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6011695465002962, "success_rate.epoch.global": 0.8529565672422815, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956664098613252, "tokens_p.mean_in_band": 0.708984375, "tokens_rate.above_band": 0.9938744257274119, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006125574272588055 }, { "epoch": 1.605718085106383, "grad_norm": 66.68457964214342, "learning_rate": 1.6386614218420013e-07, "loss": 0.3417, "step": 9660, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45985401459854014, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45255474452554745, "success_rate.epoch.env.logic": 0.5806451612903226, "success_rate.epoch.env.math": 0.9751243781094527, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.9568151147098516, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6011748517687594, "success_rate.epoch.global": 0.8530846985012199, "success_rate.window.env.ded": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998242030538441, "tokens_p.mean_below_band": 3.11434268951416e-07, "tokens_p.mean_in_band": 0.49734669811320753, "tokens_rate.above_band": 0.9579163459071081, "tokens_rate.below_band": 0.0012830382345393892, "tokens_rate.in_band": 0.04080061585835258 }, { "epoch": 1.6065492021276597, "grad_norm": 84.92666634787885, "learning_rate": 1.6385041068452603e-07, "loss": 0.2166, "step": 9665, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45255474452554745, "success_rate.epoch.env.logic": 0.5812883435582822, "success_rate.epoch.env.math": 0.9751243781094527, "success_rate.epoch.env.sat": 0.10212765957446808, "success_rate.epoch.env.science": 0.9568384138117075, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6015515917436385, "success_rate.epoch.global": 0.8530640668523677, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9943595342066958, "tokens_p.mean_in_band": 0.6800054505813954, "tokens_rate.above_band": 0.9410958904109589, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0589041095890411 }, { "epoch": 1.6073803191489362, "grad_norm": 75.16921152654156, "learning_rate": 1.638347078464165e-07, "loss": 0.2992, "step": 9670, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45255474452554745, "success_rate.epoch.env.logic": 0.5825688073394495, "success_rate.epoch.env.math": 0.9751861042183623, "success_rate.epoch.env.sat": 0.1016949152542373, "success_rate.epoch.env.science": 0.9568733153638814, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6016374414820039, "success_rate.epoch.global": 0.8530945757997218, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936224489795918, "tokens_p.mean_in_band": 0.5872395833333334, "tokens_rate.above_band": 0.9560975609756097, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04390243902439024 }, { "epoch": 1.6082114361702127, "grad_norm": 19.544656165755438, "learning_rate": 1.6381903369774025e-07, "loss": 0.3568, "step": 9675, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45255474452554745, "success_rate.epoch.env.logic": 0.5825688073394495, "success_rate.epoch.env.math": 0.9753086419753086, "success_rate.epoch.env.sat": 0.1016949152542373, "success_rate.epoch.env.science": 0.956908160517102, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6016517490192919, "success_rate.epoch.global": 0.8532731376975169, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972198460222412, "tokens_p.mean_in_band": 0.5680338541666666, "tokens_rate.above_band": 0.9605587510271159, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03944124897288414 }, { "epoch": 1.6090425531914894, "grad_norm": 150.8497379267538, "learning_rate": 1.63803388266315e-07, "loss": 0.2288, "step": 9680, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45255474452554745, "success_rate.epoch.env.logic": 0.5825688073394495, "success_rate.epoch.env.math": 0.9753086419753086, "success_rate.epoch.env.sat": 0.1016949152542373, "success_rate.epoch.env.science": 0.9569661108122647, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.601657017227943, "success_rate.epoch.global": 0.8534004163775156, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9941852168949772, "tokens_p.mean_in_band": 0.5137434554973822, "tokens_rate.above_band": 0.8209934395501406, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1790065604498594 }, { "epoch": 1.609873670212766, "grad_norm": 68.53703273612386, "learning_rate": 1.637877715799076e-07, "loss": 0.252, "step": 9685, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45255474452554745, "success_rate.epoch.env.logic": 0.5807926829268293, "success_rate.epoch.env.math": 0.9753086419753086, "success_rate.epoch.env.sat": 0.10126582278481013, "success_rate.epoch.env.science": 0.9570008062348832, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6014596970952676, "success_rate.epoch.global": 0.8530329289428076, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9973727876106194, "tokens_p.mean_in_band": 0.6693412162162162, "tokens_rate.above_band": 0.9734386216798278, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02656137832017229 }, { "epoch": 1.6107047872340425, "grad_norm": 283.55342851032486, "learning_rate": 1.637721836662338e-07, "loss": 0.1946, "step": 9690, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45255474452554745, "success_rate.epoch.env.logic": 0.5807926829268293, "success_rate.epoch.env.math": 0.9753086419753086, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.9570469798657718, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6014252140934978, "success_rate.epoch.global": 0.852987012987013, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.000475888324873, "tokens_p.mean_in_band": 0.6707175925925926, "tokens_rate.above_band": 0.9668711656441717, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033128834355828224 }, { "epoch": 1.611535904255319, "grad_norm": 136.53384687625044, "learning_rate": 1.637566245529584e-07, "loss": 0.2541, "step": 9695, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45255474452554745, "success_rate.epoch.env.logic": 0.5807926829268293, "success_rate.epoch.env.math": 0.9753390875462392, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.9571160546770303, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6014342614009696, "success_rate.epoch.global": 0.8531649948114839, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9895174050632911, "tokens_p.mean_in_band": 0.7973090277777778, "tokens_rate.above_band": 0.9461077844311377, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05389221556886228 }, { "epoch": 1.6123670212765957, "grad_norm": 63.74202025737574, "learning_rate": 1.6374109426769495e-07, "loss": 0.2923, "step": 9700, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45255474452554745, "success_rate.epoch.env.logic": 0.5799086757990868, "success_rate.epoch.env.math": 0.9753694581280788, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.9571734475374732, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6013618756113822, "success_rate.epoch.global": 0.853169804802211, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9959112149532711, "tokens_p.mean_in_band": 0.56875, "tokens_rate.above_band": 0.9553571428571429, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044642857142857144 }, { "epoch": 1.6131981382978724, "grad_norm": 21.803538223183345, "learning_rate": 1.6372559283800593e-07, "loss": 0.2534, "step": 9705, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45255474452554745, "success_rate.epoch.env.logic": 0.5799086757990868, "success_rate.epoch.env.math": 0.9753694581280788, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.9572421165152325, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6013681182457241, "success_rate.epoch.global": 0.8533218291630716, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9917218543046358, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.6140292553191489, "grad_norm": 166.94935595492123, "learning_rate": 1.637101202914026e-07, "loss": 0.2579, "step": 9710, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45255474452554745, "success_rate.epoch.env.logic": 0.5796661608497724, "success_rate.epoch.env.math": 0.97539975399754, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.9572763684913218, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6013519394181092, "success_rate.epoch.global": 0.8533011549732805, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9960115708274895, "tokens_p.mean_in_band": 0.6730769230769231, "tokens_rate.above_band": 0.9820936639118457, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01790633608815427 }, { "epoch": 1.6148603723404256, "grad_norm": 48.72009810112761, "learning_rate": 1.6369467665534487e-07, "loss": 0.2465, "step": 9715, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45255474452554745, "success_rate.epoch.env.logic": 0.5796661608497724, "success_rate.epoch.env.math": 0.9754299754299754, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.9573105656350054, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6013577956523017, "success_rate.epoch.global": 0.853402239448751, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9930218446601942, "tokens_p.mean_in_band": 0.7799479166666666, "tokens_rate.above_band": 0.9927710843373494, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007228915662650603 }, { "epoch": 1.6156914893617023, "grad_norm": 6.142620683199756, "learning_rate": 1.6367926195724148e-07, "loss": 0.2648, "step": 9720, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45255474452554745, "success_rate.epoch.env.logic": 0.5796661608497724, "success_rate.epoch.env.math": 0.9754299754299754, "success_rate.epoch.env.sat": 0.10084033613445378, "success_rate.epoch.env.science": 0.9573674393818279, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6013629659929219, "success_rate.epoch.global": 0.8535283993115318, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967851739788199, "tokens_p.mean_in_band": 0.7369791666666666, "tokens_rate.above_band": 0.9821693907875185, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017830609212481426 }, { "epoch": 1.6165226063829787, "grad_norm": 57.42820166340494, "learning_rate": 1.636638762244497e-07, "loss": 0.2926, "step": 9725, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5796661608497724, "success_rate.epoch.env.math": 0.9754299754299754, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.9571466595688048, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6016651740564902, "success_rate.epoch.global": 0.8533356258596974, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.5714285714285714, "success_rate.window.env_macro_mean": 0.6428571428571428, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9927536231884058, "tokens_p.mean_below_band": 2.8405338525772095e-08, "tokens_p.mean_in_band": 0.6987847222222222, "tokens_rate.above_band": 0.917960088691796, "tokens_rate.below_band": 0.0022172949002217295, "tokens_rate.in_band": 0.07982261640798226 }, { "epoch": 1.6173537234042552, "grad_norm": 87.56502326847537, "learning_rate": 1.6364851948427548e-07, "loss": 0.277, "step": 9730, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5779122541603631, "success_rate.epoch.env.math": 0.9754601226993865, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.9571922361074182, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6015126119763644, "success_rate.epoch.global": 0.8531684698608965, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9973125689084895, "tokens_p.mean_in_band": 0.6411290322580645, "tokens_rate.above_band": 0.951232302045097, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04876769795490299 }, { "epoch": 1.618184840425532, "grad_norm": 79.98421020975385, "learning_rate": 1.636331917639732e-07, "loss": 0.2929, "step": 9735, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5770392749244713, "success_rate.epoch.env.math": 0.9754901960784313, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.9572263549415515, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6014390858833903, "success_rate.epoch.global": 0.8531228551818806, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.0008439237490072, "tokens_p.mean_in_band": 0.5959821428571429, "tokens_rate.above_band": 0.96771714066103, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03228285933897002 }, { "epoch": 1.6190159574468086, "grad_norm": 136.0238770530322, "learning_rate": 1.6361789309074588e-07, "loss": 0.256, "step": 9740, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5776772247360482, "success_rate.epoch.env.math": 0.9755501222493888, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.957271762208068, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6015066570878496, "success_rate.epoch.global": 0.8532990574121679, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9958095577746077, "tokens_p.mean_in_band": 0.6984375, "tokens_rate.above_band": 0.979050279329609, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02094972067039106 }, { "epoch": 1.619847074468085, "grad_norm": 48.87164770039113, "learning_rate": 1.6360262349174487e-07, "loss": 0.2973, "step": 9745, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5776772247360482, "success_rate.epoch.env.math": 0.975609756097561, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.9573057544417927, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6015151685498402, "success_rate.epoch.global": 0.8534246575342466, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9907962328767124, "tokens_p.mean_below_band": 4.6798959374427795e-08, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9864864864864865, "tokens_rate.below_band": 0.006756756756756757, "tokens_rate.in_band": 0.006756756756756757 }, { "epoch": 1.6206781914893615, "grad_norm": 87.45172741279457, "learning_rate": 1.6358738299407004e-07, "loss": 0.175, "step": 9750, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5776772247360482, "success_rate.epoch.env.math": 0.9756394640682094, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.9573509933774834, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6015219819049619, "success_rate.epoch.global": 0.8535500427715996, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9948559670781894, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9918367346938776, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00816326530612245 }, { "epoch": 1.6215093085106385, "grad_norm": 542.6965028363807, "learning_rate": 1.6357217162476943e-07, "loss": 0.3542, "step": 9755, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5776772247360482, "success_rate.epoch.env.math": 0.9756394640682094, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.957154192012695, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6015040908717993, "success_rate.epoch.global": 0.8535293112288498, "success_rate.window.env.science": 0.9, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.98725, "tokens_p.mean_below_band": 1.2514647096395493e-09, "tokens_p.mean_in_band": 0.892578125, "tokens_rate.above_band": 0.9765625, "tokens_rate.below_band": 0.0078125, "tokens_rate.in_band": 0.015625 }, { "epoch": 1.622340425531915, "grad_norm": 107.83038425029258, "learning_rate": 1.6355698941083954e-07, "loss": 0.2323, "step": 9760, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.463768115942029, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5776772247360482, "success_rate.epoch.env.math": 0.9756986634264885, "success_rate.epoch.env.sat": 0.100418410041841, "success_rate.epoch.env.science": 0.957176843774782, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6015115318827416, "success_rate.epoch.global": 0.8536293766011955, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951467803030303, "tokens_p.mean_in_band": 0.679931640625, "tokens_rate.above_band": 0.9428571428571428, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05714285714285714 }, { "epoch": 1.6231715425531914, "grad_norm": 170.28793044328873, "learning_rate": 1.6354183637922513e-07, "loss": 0.2531, "step": 9765, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.460431654676259, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5768072289156626, "success_rate.epoch.env.math": 0.9757281553398058, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9572107765451664, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6010968552968968, "success_rate.epoch.global": 0.8532923916751962, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4666666666666666, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9978021978021978, "tokens_p.mean_in_band": 0.6483373397435898, "tokens_rate.above_band": 0.9722222222222222, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027777777777777776 }, { "epoch": 1.624002659574468, "grad_norm": 64.14189895342564, "learning_rate": 1.6352671255681907e-07, "loss": 0.3442, "step": 9770, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.460431654676259, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5768072289156626, "success_rate.epoch.env.math": 0.9757281553398058, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9572784810126582, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.601103010248487, "success_rate.epoch.global": 0.8534423994546694, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.987127253814147, "tokens_p.mean_in_band": 0.6112328506097561, "tokens_rate.above_band": 0.8146892655367232, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.18531073446327684 }, { "epoch": 1.6248337765957448, "grad_norm": 92.81192114351522, "learning_rate": 1.6351161797046246e-07, "loss": 0.3274, "step": 9775, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.460431654676259, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5774436090225564, "success_rate.epoch.env.math": 0.9757575757575757, "success_rate.epoch.env.sat": 0.0995850622406639, "success_rate.epoch.env.science": 0.957334737951014, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6011309302215491, "success_rate.epoch.global": 0.8534717494894486, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9950657894736842, "tokens_p.mean_in_band": 0.50146484375, "tokens_rate.above_band": 0.9344262295081968, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06557377049180328 }, { "epoch": 1.6256648936170213, "grad_norm": 79.38673202262062, "learning_rate": 1.6349655264694446e-07, "loss": 0.352, "step": 9780, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.460431654676259, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5774436090225564, "success_rate.epoch.env.math": 0.9757575757575757, "success_rate.epoch.env.sat": 0.09917355371900827, "success_rate.epoch.env.science": 0.9573908469226723, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6010986211715493, "success_rate.epoch.global": 0.8534512070724244, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9930555555555556, "tokens_p.mean_in_band": 0.609619140625, "tokens_rate.above_band": 0.9183673469387755, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08163265306122448 }, { "epoch": 1.6264960106382977, "grad_norm": 50.10934825445179, "learning_rate": 1.6348151661300235e-07, "loss": 0.2157, "step": 9785, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.460431654676259, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5774436090225564, "success_rate.epoch.env.math": 0.9757869249394673, "success_rate.epoch.env.sat": 0.102880658436214, "success_rate.epoch.env.science": 0.9574132492113565, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6014403353704385, "success_rate.epoch.global": 0.853550798504927, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9871987951807228, "tokens_p.mean_in_band": 0.6979166666666666, "tokens_rate.above_band": 0.9651162790697675, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03488372093023256 }, { "epoch": 1.6273271276595744, "grad_norm": 92.08101166147156, "learning_rate": 1.6346650989532143e-07, "loss": 0.2566, "step": 9790, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.460431654676259, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5774436090225564, "success_rate.epoch.env.math": 0.9758162031438936, "success_rate.epoch.env.sat": 0.102880658436214, "success_rate.epoch.env.science": 0.9574579831932774, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6014470637510155, "success_rate.epoch.global": 0.8536750976065184, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9945958646616542, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9925373134328358, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007462686567164179 }, { "epoch": 1.6281582446808511, "grad_norm": 57.95508685092905, "learning_rate": 1.634515325205349e-07, "loss": 0.2988, "step": 9795, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.460431654676259, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.45652173913043476, "success_rate.epoch.env.logic": 0.5774436090225564, "success_rate.epoch.env.math": 0.9758162031438936, "success_rate.epoch.env.sat": 0.102880658436214, "success_rate.epoch.env.science": 0.9572851153039832, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6014313484883524, "success_rate.epoch.global": 0.8537040176301068, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9945820433436533, "tokens_p.mean_below_band": 9.74978320300579e-10, "tokens_p.mean_in_band": 0.6742621527777778, "tokens_rate.above_band": 0.9714285714285714, "tokens_rate.below_band": 0.0015037593984962407, "tokens_rate.in_band": 0.02706766917293233 }, { "epoch": 1.6289893617021276, "grad_norm": 149.77229730209604, "learning_rate": 1.6343658451522397e-07, "loss": 0.2248, "step": 9800, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.460431654676259, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.460431654676259, "success_rate.epoch.env.logic": 0.5774436090225564, "success_rate.epoch.env.math": 0.9759326113116726, "success_rate.epoch.env.sat": 0.102880658436214, "success_rate.epoch.env.science": 0.9573074908328968, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.601799412055854, "success_rate.epoch.global": 0.8538774128005419, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.984059633027523, "tokens_p.mean_in_band": 0.8650841346153846, "tokens_rate.above_band": 0.9767025089605734, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023297491039426525 }, { "epoch": 1.6298204787234043, "grad_norm": 21.134333729103908, "learning_rate": 1.6342166590591767e-07, "loss": 0.308, "step": 9805, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.460431654676259, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.460431654676259, "success_rate.epoch.env.logic": 0.5765765765765766, "success_rate.epoch.env.math": 0.9759326113116726, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.9573521716378859, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6016863217209665, "success_rate.epoch.global": 0.8536874154262517, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.993566833452466, "tokens_p.mean_in_band": 0.719640899122807, "tokens_rate.above_band": 0.9608516483516484, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03914835164835165 }, { "epoch": 1.630651595744681, "grad_norm": 70.64305279280978, "learning_rate": 1.634067767190929e-07, "loss": 0.4339, "step": 9810, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.460431654676259, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.460431654676259, "success_rate.epoch.env.logic": 0.5765765765765766, "success_rate.epoch.env.math": 0.9759615384615384, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.9574078912986673, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6016940168855709, "success_rate.epoch.global": 0.8538357553227441, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9921875, "tokens_p.mean_in_band": 0.6150568181818182, "tokens_rate.above_band": 0.907563025210084, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09243697478991597 }, { "epoch": 1.6314827127659575, "grad_norm": 31.51493105180416, "learning_rate": 1.6339191698117424e-07, "loss": 0.3018, "step": 9815, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45714285714285713, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.460431654676259, "success_rate.epoch.env.logic": 0.5757121439280359, "success_rate.epoch.env.math": 0.9759903961584634, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.9571577847439916, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6012963369728714, "success_rate.epoch.global": 0.8534278959810875, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9994365743721829, "tokens_p.mean_below_band": 9.918585419654846e-08, "tokens_p.mean_in_band": 0.54364013671875, "tokens_rate.above_band": 0.9791929382093316, "tokens_rate.below_band": 0.0006305170239596469, "tokens_rate.in_band": 0.0201765447667087 }, { "epoch": 1.632313829787234, "grad_norm": 157.68248924518582, "learning_rate": 1.6337708671853417e-07, "loss": 0.3918, "step": 9820, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45714285714285713, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.460431654676259, "success_rate.epoch.env.logic": 0.5757121439280359, "success_rate.epoch.env.math": 0.9760191846522782, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.9572248304642671, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6013050491741524, "success_rate.epoch.global": 0.8536009445100354, "success_rate.window.env.abd": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9854484732824428, "tokens_p.mean_below_band": 4.544854164123535e-07, "tokens_p.mean_in_band": 0.15770862800565771, "tokens_rate.above_band": 0.2698249227600412, "tokens_rate.below_band": 0.0020597322348094747, "tokens_rate.in_band": 0.7281153450051493 }, { "epoch": 1.6331449468085106, "grad_norm": 345.0089355512192, "learning_rate": 1.6336228595749277e-07, "loss": 0.2504, "step": 9825, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45714285714285713, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4642857142857143, "success_rate.epoch.env.logic": 0.5769805680119582, "success_rate.epoch.env.math": 0.9760765550239234, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.9572582746937712, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6017789853827458, "success_rate.epoch.global": 0.8537982145864915, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9979580965909091, "tokens_p.mean_in_band": 0.49609375, "tokens_rate.above_band": 0.9985815602836879, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0014184397163120568 }, { "epoch": 1.6339760638297873, "grad_norm": 25.660032222857822, "learning_rate": 1.6334751472431772e-07, "loss": 0.1606, "step": 9830, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45390070921985815, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4642857142857143, "success_rate.epoch.env.logic": 0.5769805680119582, "success_rate.epoch.env.math": 0.97610513739546, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.9572916666666667, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6014898786937851, "success_rate.epoch.global": 0.8537529451363177, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994299674267101, "tokens_p.mean_in_band": 0.6787109375, "tokens_rate.above_band": 0.9935275080906149, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006472491909385114 }, { "epoch": 1.6348071808510638, "grad_norm": 81.76529913060513, "learning_rate": 1.6333277304522438e-07, "loss": 0.1528, "step": 9835, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.45390070921985815, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4642857142857143, "success_rate.epoch.env.logic": 0.5761194029850746, "success_rate.epoch.env.math": 0.9761336515513126, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.9573361082206036, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.601418223301322, "success_rate.epoch.global": 0.8537323470073974, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9998337765957447, "tokens_p.mean_in_band": 0.6682942708333334, "tokens_rate.above_band": 0.9842931937172775, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015706806282722512 }, { "epoch": 1.6356382978723403, "grad_norm": 48.12581691359298, "learning_rate": 1.6331806094637558e-07, "loss": 0.2638, "step": 9840, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.4507042253521127, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4642857142857143, "success_rate.epoch.env.logic": 0.5761194029850746, "success_rate.epoch.env.math": 0.9761336515513126, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.9574025974025974, "success_rate.epoch.env.webshop": 0.5714285714285714, "success_rate.epoch.env_macro_mean": 0.6011336783298901, "success_rate.epoch.global": 0.8537363560033585, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9977481036662452, "tokens_p.mean_in_band": 0.7026186342592593, "tokens_rate.above_band": 0.9669926650366748, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03300733496332518 }, { "epoch": 1.636469414893617, "grad_norm": 71.52089573257668, "learning_rate": 1.633033784538817e-07, "loss": 0.2687, "step": 9845, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.44755244755244755, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4642857142857143, "success_rate.epoch.env.logic": 0.5761194029850746, "success_rate.epoch.env.math": 0.9761336515513126, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.9574468085106383, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5943576657735087, "success_rate.epoch.global": 0.8535480624056366, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9943449575871819, "tokens_p.mean_in_band": 0.4790719696969697, "tokens_rate.above_band": 0.865415986949429, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13458401305057097 }, { "epoch": 1.6373005319148937, "grad_norm": 68.71381725162301, "learning_rate": 1.6328872559380057e-07, "loss": 0.2793, "step": 9850, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.44755244755244755, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4642857142857143, "success_rate.epoch.env.logic": 0.5767511177347243, "success_rate.epoch.env.math": 0.9761620977353993, "success_rate.epoch.env.sat": 0.10245901639344263, "success_rate.epoch.env.science": 0.9574688796680498, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.594419686872704, "success_rate.epoch.global": 0.8536462699077955, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969671741198858, "tokens_p.mean_in_band": 0.619140625, "tokens_rate.above_band": 0.9831618334892422, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01683816651075772 }, { "epoch": 1.6381316489361701, "grad_norm": 640.2945316924704, "learning_rate": 1.6327410239213746e-07, "loss": 0.3087, "step": 9855, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.44755244755244755, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46099290780141844, "success_rate.epoch.env.logic": 0.5767511177347243, "success_rate.epoch.env.math": 0.9761904761904762, "success_rate.epoch.env.sat": 0.1016260162601626, "success_rate.epoch.env.science": 0.9574909279419388, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5940491977919211, "success_rate.epoch.global": 0.8532909060458884, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.45833333333333337, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9934192764578834, "tokens_p.mean_below_band": 1.5366822481155396e-07, "tokens_p.mean_in_band": 0.5515983166189111, "tokens_rate.above_band": 0.9134401972872996, "tokens_rate.below_band": 0.0004932182490752158, "tokens_rate.in_band": 0.08606658446362515 }, { "epoch": 1.6389627659574468, "grad_norm": 26.582517328401593, "learning_rate": 1.6325950887484494e-07, "loss": 0.236, "step": 9860, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.44755244755244755, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46099290780141844, "success_rate.epoch.env.logic": 0.5767511177347243, "success_rate.epoch.env.math": 0.976218787158145, "success_rate.epoch.env.sat": 0.1016260162601626, "success_rate.epoch.env.science": 0.957545948744499, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5940567734073965, "success_rate.epoch.global": 0.8534381796888071, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9923052763819096, "tokens_p.mean_in_band": 0.8203125, "tokens_rate.above_band": 0.9802955665024631, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019704433497536946 }, { "epoch": 1.6397938829787235, "grad_norm": 119.06521143810635, "learning_rate": 1.6324494506782295e-07, "loss": 0.4888, "step": 9865, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.44755244755244755, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46099290780141844, "success_rate.epoch.env.logic": 0.5765230312035661, "success_rate.epoch.env.math": 0.9762470308788599, "success_rate.epoch.env.sat": 0.10121457489878542, "success_rate.epoch.env.science": 0.9575898629428498, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5940051943188993, "success_rate.epoch.global": 0.8532999164578112, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9953225806451613, "tokens_p.mean_in_band": 0.6643518518518519, "tokens_rate.above_band": 0.9663341645885287, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03366583541147132 }, { "epoch": 1.640625, "grad_norm": 57.04424599716554, "learning_rate": 1.6323041099691862e-07, "loss": 0.2523, "step": 9870, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.44755244755244755, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46099290780141844, "success_rate.epoch.env.logic": 0.5771513353115727, "success_rate.epoch.env.math": 0.976303317535545, "success_rate.epoch.env.sat": 0.10080645161290322, "success_rate.epoch.env.science": 0.9576227390180878, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5940333164601763, "success_rate.epoch.global": 0.8533044058744993, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9898574561403509, "tokens_p.mean_in_band": 0.6166666666666667, "tokens_rate.above_band": 0.9382716049382716, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06172839506172839 }, { "epoch": 1.6414561170212765, "grad_norm": 818.1965300739258, "learning_rate": 1.6321590668792644e-07, "loss": 0.1898, "step": 9875, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.44755244755244755, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46099290780141844, "success_rate.epoch.env.logic": 0.5771513353115727, "success_rate.epoch.env.math": 0.9763593380614657, "success_rate.epoch.env.sat": 0.10080645161290322, "success_rate.epoch.env.science": 0.9576555641621483, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5940413933392654, "success_rate.epoch.global": 0.8534267133566783, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.986451048951049, "tokens_p.mean_in_band": 0.8716517857142857, "tokens_rate.above_band": 0.9533333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04666666666666667 }, { "epoch": 1.6422872340425532, "grad_norm": 61.02273438130012, "learning_rate": 1.6320143216658794e-07, "loss": 0.4092, "step": 9880, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.44755244755244755, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46099290780141844, "success_rate.epoch.env.logic": 0.5762962962962963, "success_rate.epoch.env.math": 0.9763872491145218, "success_rate.epoch.env.sat": 0.10080645161290322, "success_rate.epoch.env.science": 0.9576992519989683, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5939701715096837, "success_rate.epoch.global": 0.8534066300183242, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9986432706222865, "tokens_p.mean_below_band": 2.1316282072803006e-12, "tokens_p.mean_in_band": 0.5434283088235294, "tokens_rate.above_band": 0.9517906336088154, "tokens_rate.below_band": 0.0013774104683195593, "tokens_rate.in_band": 0.046831955922865015 }, { "epoch": 1.6431183510638299, "grad_norm": 390.3387918215039, "learning_rate": 1.6318698745859192e-07, "loss": 0.4407, "step": 9885, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.44755244755244755, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4647887323943662, "success_rate.epoch.env.logic": 0.5762962962962963, "success_rate.epoch.env.math": 0.9763872491145218, "success_rate.epoch.env.sat": 0.10040160642570281, "success_rate.epoch.env.science": 0.9575070821529745, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5942609723787521, "success_rate.epoch.global": 0.8532690068208285, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7142857142857143, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9879313509544787, "tokens_p.mean_in_band": 0.79547119140625, "tokens_rate.above_band": 0.9140939597315436, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08590604026845637 }, { "epoch": 1.6439494680851063, "grad_norm": 142.5144629987705, "learning_rate": 1.6317257258957414e-07, "loss": 0.264, "step": 9890, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.44755244755244755, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4647887323943662, "success_rate.epoch.env.logic": 0.5769230769230769, "success_rate.epoch.env.math": 0.9764150943396226, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9575726407816919, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5942899340201063, "success_rate.epoch.global": 0.8533222591362126, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9886592741935484, "tokens_p.mean_in_band": 0.7634765625, "tokens_rate.above_band": 0.9253731343283582, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07462686567164178 }, { "epoch": 1.644780585106383, "grad_norm": 702.9882753113421, "learning_rate": 1.6315818758511747e-07, "loss": 0.3563, "step": 9895, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.44755244755244755, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4647887323943662, "success_rate.epoch.env.logic": 0.5775480059084195, "success_rate.epoch.env.math": 0.9764150943396226, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9575944487278335, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5943487282866049, "success_rate.epoch.global": 0.8533953179478665, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9916974169741697, "tokens_p.mean_below_band": 1.2514647096395493e-09, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9890510948905109, "tokens_rate.below_band": 0.0036496350364963502, "tokens_rate.in_band": 0.0072992700729927005 }, { "epoch": 1.6456117021276597, "grad_norm": 115.40388935494721, "learning_rate": 1.6314383247075182e-07, "loss": 0.268, "step": 9900, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.44755244755244755, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4647887323943662, "success_rate.epoch.env.logic": 0.5775480059084195, "success_rate.epoch.env.math": 0.9764705882352941, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9576488706365504, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5943587206324584, "success_rate.epoch.global": 0.8535655058043118, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954128440366973, "tokens_p.mean_in_band": 0.40625, "tokens_rate.above_band": 0.9969512195121951, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003048780487804878 }, { "epoch": 1.6464428191489362, "grad_norm": 46.18645799126082, "learning_rate": 1.6312950727195392e-07, "loss": 0.2314, "step": 9905, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.44755244755244755, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46853146853146854, "success_rate.epoch.env.logic": 0.5781710914454278, "success_rate.epoch.env.math": 0.9765533411488863, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9576814567837907, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5947660988810897, "success_rate.epoch.global": 0.8537595230208679, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9999180865006553, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.6472739361702127, "grad_norm": 86.79595239960364, "learning_rate": 1.6311521201414756e-07, "loss": 0.2695, "step": 9910, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.44755244755244755, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46853146853146854, "success_rate.epoch.env.logic": 0.5781710914454278, "success_rate.epoch.env.math": 0.9766081871345029, "success_rate.epoch.env.sat": 0.099601593625498, "success_rate.epoch.env.science": 0.9577356557377049, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5947397932960923, "success_rate.epoch.global": 0.8537876281839233, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9984046546546547, "tokens_p.mean_in_band": 0.5883413461538461, "tokens_rate.above_band": 0.9624277456647399, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03757225433526012 }, { "epoch": 1.6481050531914894, "grad_norm": 51.938943423946014, "learning_rate": 1.6310094672270324e-07, "loss": 0.3952, "step": 9915, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46853146853146854, "success_rate.epoch.env.logic": 0.5781710914454278, "success_rate.epoch.env.math": 0.9766081871345029, "success_rate.epoch.env.sat": 0.099601593625498, "success_rate.epoch.env.science": 0.9577789150460594, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5944611802233969, "success_rate.epoch.global": 0.8537431829449678, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9992455527318933, "tokens_p.mean_in_band": 0.6865885416666667, "tokens_rate.above_band": 0.9812967581047382, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018703241895261846 }, { "epoch": 1.648936170212766, "grad_norm": 102.08126046801257, "learning_rate": 1.6308671142293845e-07, "loss": 0.3502, "step": 9920, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46853146853146854, "success_rate.epoch.env.logic": 0.5781710914454278, "success_rate.epoch.env.math": 0.9766355140186916, "success_rate.epoch.env.sat": 0.099601593625498, "success_rate.epoch.env.science": 0.9578220858895705, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5944675891077332, "success_rate.epoch.global": 0.8538639365918098, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9941275167785235, "tokens_p.mean_in_band": 0.53125, "tokens_rate.above_band": 0.9977678571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002232142857142857 }, { "epoch": 1.6497672872340425, "grad_norm": 35.110625670913386, "learning_rate": 1.630725061401173e-07, "loss": 0.2041, "step": 9925, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.4444444444444444, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46853146853146854, "success_rate.epoch.env.logic": 0.5781710914454278, "success_rate.epoch.env.math": 0.9766355140186916, "success_rate.epoch.env.sat": 0.099601593625498, "success_rate.epoch.env.science": 0.9576530612244898, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5944522232290895, "success_rate.epoch.global": 0.853891820580475, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995954425363276, "tokens_p.mean_in_band": 0.6505533854166666, "tokens_rate.above_band": 0.969270166453265, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030729833546734954 }, { "epoch": 1.650598404255319, "grad_norm": 102.56214821476766, "learning_rate": 1.6305833089945072e-07, "loss": 0.2444, "step": 9930, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.4482758620689655, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.46853146853146854, "success_rate.epoch.env.logic": 0.5781710914454278, "success_rate.epoch.env.math": 0.9766355140186916, "success_rate.epoch.env.sat": 0.099601593625498, "success_rate.epoch.env.science": 0.9576962283384302, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5948044582053134, "success_rate.epoch.global": 0.8540121931125392, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9928745928338762, "tokens_p.mean_in_band": 0.732421875, "tokens_rate.above_band": 0.9935275080906149, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006472491909385114 }, { "epoch": 1.6514295212765957, "grad_norm": 195.47797474383552, "learning_rate": 1.6304418572609623e-07, "loss": 0.2179, "step": 9935, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.4520547945205479, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4722222222222222, "success_rate.epoch.env.logic": 0.5781710914454278, "success_rate.epoch.env.math": 0.9766627771295215, "success_rate.epoch.env.sat": 0.099601593625498, "success_rate.epoch.env.science": 0.9574847250509165, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5954667714749181, "success_rate.epoch.global": 0.8540157998683344, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.6666666666666666, "success_rate.window.env_macro_mean": 0.9333333333333332, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9891895085066162, "tokens_p.mean_in_band": 0.7721354166666666, "tokens_rate.above_band": 0.9592021758839528, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04079782411604715 }, { "epoch": 1.6522606382978724, "grad_norm": 118.87718007106503, "learning_rate": 1.6303007064515812e-07, "loss": 0.2728, "step": 9940, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.4520547945205479, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4722222222222222, "success_rate.epoch.env.logic": 0.5781710914454278, "success_rate.epoch.env.math": 0.9766899766899767, "success_rate.epoch.env.sat": 0.0992063492063492, "success_rate.epoch.env.science": 0.9575063613231553, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5954352797852404, "success_rate.epoch.global": 0.8539473684210527, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9930334394904459, "tokens_p.mean_in_band": 0.6655815972222222, "tokens_rate.above_band": 0.8971428571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10285714285714286 }, { "epoch": 1.6530917553191489, "grad_norm": 60.66221421228457, "learning_rate": 1.6301598568168717e-07, "loss": 0.465, "step": 9945, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.4520547945205479, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47586206896551725, "success_rate.epoch.env.logic": 0.5787923416789397, "success_rate.epoch.env.math": 0.9766899766899767, "success_rate.epoch.env.sat": 0.0992063492063492, "success_rate.epoch.env.science": 0.9572845156369184, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5958024844480194, "success_rate.epoch.global": 0.8539030402629416, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9977050388635754, "tokens_p.mean_below_band": 1.0058283805847168e-06, "tokens_p.mean_in_band": 0.5398448114809783, "tokens_rate.above_band": 0.9418149690773697, "tokens_rate.below_band": 0.00012621481761958855, "tokens_rate.in_band": 0.05805881610501073 }, { "epoch": 1.6539228723404256, "grad_norm": 52.79032072014623, "learning_rate": 1.6300193086068074e-07, "loss": 0.2674, "step": 9950, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.4520547945205479, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47586206896551725, "success_rate.epoch.env.logic": 0.5770925110132159, "success_rate.epoch.env.math": 0.9766899766899767, "success_rate.epoch.env.sat": 0.0992063492063492, "success_rate.epoch.env.science": 0.9573062261753494, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.595649928072811, "success_rate.epoch.global": 0.8536705534570537, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.611111111111111, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9967152412804586, "tokens_p.mean_in_band": 0.6524251302083334, "tokens_rate.above_band": 0.9886632026452528, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011336797354747285 }, { "epoch": 1.6547539893617023, "grad_norm": 103.80170698926067, "learning_rate": 1.629879062070827e-07, "loss": 0.2125, "step": 9955, "success_rate.epoch.env.abd": 0.5853658536585366, "success_rate.epoch.env.agentgym:alfworld": 0.4520547945205479, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47586206896551725, "success_rate.epoch.env.logic": 0.5770925110132159, "success_rate.epoch.env.math": 0.9766899766899767, "success_rate.epoch.env.sat": 0.10276679841897234, "success_rate.epoch.env.science": 0.9573604060913705, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5959785307208696, "success_rate.epoch.global": 0.8538146021328958, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944196428571429, "tokens_p.mean_in_band": 0.6907894736842105, "tokens_rate.above_band": 0.9218106995884774, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07818930041152264 }, { "epoch": 1.6555851063829787, "grad_norm": 53.56394451332157, "learning_rate": 1.6297391174578338e-07, "loss": 0.1578, "step": 9960, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4557823129251701, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47586206896551725, "success_rate.epoch.env.logic": 0.5770925110132159, "success_rate.epoch.env.math": 0.9766899766899767, "success_rate.epoch.env.sat": 0.10276679841897234, "success_rate.epoch.env.science": 0.9573820395738204, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.597216839217836, "success_rate.epoch.global": 0.853910477127398, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9898330479452054, "tokens_p.mean_below_band": 8.754432201385498e-08, "tokens_p.mean_in_band": 0.8407738095238095, "tokens_rate.above_band": 0.9636963696369637, "tokens_rate.below_band": 0.0016501650165016502, "tokens_rate.in_band": 0.034653465346534656 }, { "epoch": 1.6564162234042552, "grad_norm": 74.24214927650222, "learning_rate": 1.6295994750161947e-07, "loss": 0.3009, "step": 9965, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4557823129251701, "success_rate.epoch.env.agentgym:sciworld": 0.5, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4726027397260274, "success_rate.epoch.env.logic": 0.5770925110132159, "success_rate.epoch.env.math": 0.9767171129220024, "success_rate.epoch.env.sat": 0.10236220472440945, "success_rate.epoch.env.science": 0.9573928480852143, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5968872048368693, "success_rate.epoch.global": 0.853678518761265, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9858751846381093, "tokens_p.mean_below_band": 9.424984455108643e-07, "tokens_p.mean_in_band": 0.7144990808823529, "tokens_rate.above_band": 0.8316953316953317, "tokens_rate.below_band": 0.0012285012285012285, "tokens_rate.in_band": 0.16707616707616707 }, { "epoch": 1.657247340425532, "grad_norm": 28.241062189574908, "learning_rate": 1.6294601349937413e-07, "loss": 0.2326, "step": 9970, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4557823129251701, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4726027397260274, "success_rate.epoch.env.logic": 0.5770925110132159, "success_rate.epoch.env.math": 0.9767171129220024, "success_rate.epoch.env.sat": 0.10116731517509728, "success_rate.epoch.env.science": 0.9574036511156186, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5902860541143713, "success_rate.epoch.global": 0.8531434184675835, "success_rate.window.env.agentgym:sciworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 1.0, "tokens_p.mean_below_band": 5.893525667488575e-10, "tokens_p.mean_in_band": 0.6446759259259259, "tokens_rate.above_band": 0.9535472972972973, "tokens_rate.below_band": 0.0008445945945945946, "tokens_rate.in_band": 0.04560810810810811 }, { "epoch": 1.6580784574468086, "grad_norm": 168.5435991546668, "learning_rate": 1.629321097637768e-07, "loss": 0.215, "step": 9975, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4557823129251701, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5770925110132159, "success_rate.epoch.env.math": 0.9767981438515081, "success_rate.epoch.env.sat": 0.10116731517509728, "success_rate.epoch.env.science": 0.9574468085106383, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5906235018224598, "success_rate.epoch.global": 0.8533355134074558, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9880181347150259, "tokens_p.mean_below_band": 1.2514647096395493e-09, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9922879177377892, "tokens_rate.below_band": 0.002570694087403599, "tokens_rate.in_band": 0.005141388174807198 }, { "epoch": 1.658909574468085, "grad_norm": 55.94921874535783, "learning_rate": 1.6291823631950322e-07, "loss": 0.2574, "step": 9980, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4557823129251701, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5762463343108505, "success_rate.epoch.env.math": 0.9767981438515081, "success_rate.epoch.env.sat": 0.10116731517509728, "success_rate.epoch.env.science": 0.9574791192103265, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5905495140040347, "success_rate.epoch.global": 0.8532679738562091, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992559523809523, "tokens_p.mean_in_band": 0.44375, "tokens_rate.above_band": 0.9853372434017595, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01466275659824047 }, { "epoch": 1.6597406914893615, "grad_norm": 69.42549739576562, "learning_rate": 1.629043931911753e-07, "loss": 0.2439, "step": 9985, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4557823129251701, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.575402635431918, "success_rate.epoch.env.math": 0.9768518518518519, "success_rate.epoch.env.sat": 0.10116731517509728, "success_rate.epoch.env.science": 0.9575113808801214, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.590480629530508, "success_rate.epoch.global": 0.8532484492327783, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9947323462414579, "tokens_p.mean_in_band": 0.6727430555555556, "tokens_rate.above_band": 0.9701657458563536, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02983425414364641 }, { "epoch": 1.6605718085106385, "grad_norm": 21.98729136609232, "learning_rate": 1.6289058040336123e-07, "loss": 0.251, "step": 9990, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4557823129251701, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.575402635431918, "success_rate.epoch.env.math": 0.9768518518518519, "success_rate.epoch.env.sat": 0.10038610038610038, "success_rate.epoch.env.science": 0.9575328614762386, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5904115627857007, "success_rate.epoch.global": 0.8530179445350734, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9968622795115333, "tokens_p.mean_in_band": 0.6169871794871795, "tokens_rate.above_band": 0.9497422680412371, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05025773195876289 }, { "epoch": 1.661402925531915, "grad_norm": 121.54956901877497, "learning_rate": 1.6287679798057533e-07, "loss": 0.2917, "step": 9995, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4557823129251701, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5745614035087719, "success_rate.epoch.env.math": 0.9768518518518519, "success_rate.epoch.env.sat": 0.10038610038610038, "success_rate.epoch.env.science": 0.9575971731448764, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5903409336716545, "success_rate.epoch.global": 0.8530226495030145, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9981786809815951, "tokens_p.mean_below_band": 1.6916601452976465e-10, "tokens_p.mean_in_band": 0.544921875, "tokens_rate.above_band": 0.9775112443778111, "tokens_rate.below_band": 0.0014992503748125937, "tokens_rate.in_band": 0.020989505247376312 }, { "epoch": 1.6622340425531914, "grad_norm": 62.16036825763515, "learning_rate": 1.6286304594727802e-07, "loss": 0.2061, "step": 10000, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4557823129251701, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5751824817518248, "success_rate.epoch.env.math": 0.9768518518518519, "success_rate.epoch.env.sat": 0.10038610038610038, "success_rate.epoch.env.science": 0.9576292559899118, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5904003119523898, "success_rate.epoch.global": 0.8531183846279108, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9950244413407822, "tokens_p.mean_in_band": 0.859375, "tokens_rate.above_band": 0.9808219178082191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019178082191780823 }, { "epoch": 1.663065159574468, "grad_norm": 182.5395723992753, "learning_rate": 1.628493243278758e-07, "loss": 0.2208, "step": 10005, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4557823129251701, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5743440233236151, "success_rate.epoch.env.math": 0.9768518518518519, "success_rate.epoch.env.sat": 0.10038610038610038, "success_rate.epoch.env.science": 0.9576719576719577, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5903279704300112, "success_rate.epoch.global": 0.8530751708428246, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980755131964809, "tokens_p.mean_in_band": 0.7940340909090909, "tokens_rate.above_band": 0.9841269841269841, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015873015873015872 }, { "epoch": 1.6638962765957448, "grad_norm": 62.62513574123092, "learning_rate": 1.6283563314672118e-07, "loss": 0.2351, "step": 10010, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4557823129251701, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5743440233236151, "success_rate.epoch.env.math": 0.9769585253456221, "success_rate.epoch.env.sat": 0.10038610038610038, "success_rate.epoch.env.science": 0.9577039274924471, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5903405743676712, "success_rate.epoch.global": 0.8532423208191127, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9896449704142012, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9941176470588236, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0058823529411764705 }, { "epoch": 1.6647273936170213, "grad_norm": 129.17829421785518, "learning_rate": 1.628219724281126e-07, "loss": 0.2205, "step": 10015, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4557823129251701, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5743440233236151, "success_rate.epoch.env.math": 0.9770114942528736, "success_rate.epoch.env.sat": 0.10038610038610038, "success_rate.epoch.env.science": 0.9577571033442294, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5903502238912197, "success_rate.epoch.global": 0.8534090909090909, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9917452830188679, "tokens_p.mean_in_band": 0.577392578125, "tokens_rate.above_band": 0.8688524590163934, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13114754098360656 }, { "epoch": 1.6655585106382977, "grad_norm": 34.35706374232816, "learning_rate": 1.6280834219629457e-07, "loss": 0.1444, "step": 10020, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4557823129251701, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5743440233236151, "success_rate.epoch.env.math": 0.9770114942528736, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9578101456554495, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5903199458844125, "success_rate.epoch.global": 0.8533895556276354, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.993408203125, "tokens_p.mean_in_band": 0.6596354166666667, "tokens_rate.above_band": 0.9446494464944649, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.055350553505535055 }, { "epoch": 1.6663896276595744, "grad_norm": 59.36509805064336, "learning_rate": 1.627947424754574e-07, "loss": 0.2737, "step": 10025, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4557823129251701, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5749636098981077, "success_rate.epoch.env.math": 0.9770114942528736, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9578524836929252, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5903801208491368, "success_rate.epoch.global": 0.8535083454869551, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9884026508226691, "tokens_p.mean_in_band": 0.5456517269736842, "tokens_rate.above_band": 0.7421981004070556, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.25780189959294436 }, { "epoch": 1.6672207446808511, "grad_norm": 126.21370400205834, "learning_rate": 1.627811732897372e-07, "loss": 0.2022, "step": 10030, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5747460087082729, "success_rate.epoch.env.math": 0.9770114942528736, "success_rate.epoch.env.sat": 0.09961685823754789, "success_rate.epoch.env.science": 0.9578736208625878, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5906617154629246, "success_rate.epoch.global": 0.8533268576979116, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9981194690265487, "tokens_p.mean_in_band": 0.627197265625, "tokens_rate.above_band": 0.9724612736660929, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.027538726333907058 }, { "epoch": 1.6680518617021276, "grad_norm": 86.34771415370268, "learning_rate": 1.6276763466321599e-07, "loss": 0.3635, "step": 10035, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5753623188405798, "success_rate.epoch.env.math": 0.9770114942528736, "success_rate.epoch.env.sat": 0.09961685823754789, "success_rate.epoch.env.science": 0.9576759328825445, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5906997720222212, "success_rate.epoch.global": 0.8533074559275433, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9973783783783784, "tokens_p.mean_in_band": 0.5087890625, "tokens_rate.above_band": 0.996982108212977, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003017891787023065 }, { "epoch": 1.6688829787234043, "grad_norm": 61.34112800208512, "learning_rate": 1.627541266199215e-07, "loss": 0.2135, "step": 10040, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5745296671490593, "success_rate.epoch.env.math": 0.9770378874856487, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9577077077077077, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5909417804988917, "success_rate.epoch.global": 0.8532880917757312, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9950703017832647, "tokens_p.mean_below_band": 2.5331974029541016e-07, "tokens_p.mean_in_band": 0.7449776785714286, "tokens_rate.above_band": 0.989145183175034, "tokens_rate.below_band": 0.0013568521031207597, "tokens_rate.in_band": 0.009497964721845319 }, { "epoch": 1.669714095744681, "grad_norm": 21.816264318816746, "learning_rate": 1.6274064918382726e-07, "loss": 0.2009, "step": 10045, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5745296671490593, "success_rate.epoch.env.math": 0.9770378874856487, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9577605598600349, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5909465852400123, "success_rate.epoch.global": 0.853406522441072, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9951117318435754, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9944444444444445, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005555555555555556 }, { "epoch": 1.6705452127659575, "grad_norm": 242.47526879546342, "learning_rate": 1.6272720237885236e-07, "loss": 0.2633, "step": 10050, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5745296671490593, "success_rate.epoch.env.math": 0.9770642201834863, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9578027465667915, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5909528142767936, "success_rate.epoch.global": 0.8535247620583966, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.998876953125, "tokens_p.mean_in_band": 0.2304322429906542, "tokens_rate.above_band": 0.7995003123048094, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.2004996876951905 }, { "epoch": 1.671376329787234, "grad_norm": 46.722616071256425, "learning_rate": 1.6271378622886158e-07, "loss": 0.2906, "step": 10055, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47619047619047616, "success_rate.epoch.env.logic": 0.5736994219653179, "success_rate.epoch.env.math": 0.977116704805492, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9578343313373253, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5908849801139571, "success_rate.epoch.global": 0.853505237711523, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9975400874635568, "tokens_p.mean_in_band": 0.6324404761904762, "tokens_rate.above_band": 0.9702970297029703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0297029702970297 }, { "epoch": 1.6722074468085106, "grad_norm": 34.4290066472054, "learning_rate": 1.6270040075766532e-07, "loss": 0.275, "step": 10060, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4797297297297297, "success_rate.epoch.env.logic": 0.5736994219653179, "success_rate.epoch.env.math": 0.9771428571428571, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9576376775479691, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5911912303037081, "success_rate.epoch.global": 0.8535093367675467, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9950559701492537, "tokens_p.mean_below_band": 2.2351741790771484e-07, "tokens_p.mean_in_band": 0.8046875, "tokens_rate.above_band": 0.9852941176470589, "tokens_rate.below_band": 0.0029411764705882353, "tokens_rate.in_band": 0.011764705882352941 }, { "epoch": 1.6730385638297873, "grad_norm": 286.8431186446181, "learning_rate": 1.6268704598901948e-07, "loss": 0.4636, "step": 10065, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4797297297297297, "success_rate.epoch.env.logic": 0.5736994219653179, "success_rate.epoch.env.math": 0.9771428571428571, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9576693227091634, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.591194107136544, "success_rate.epoch.global": 0.8536036036036037, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9937616333178222, "tokens_p.mean_in_band": 0.5348777770996094, "tokens_rate.above_band": 0.8935550935550935, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10644490644490645 }, { "epoch": 1.6738696808510638, "grad_norm": 47.635321748134814, "learning_rate": 1.6267372194662553e-07, "loss": 0.1339, "step": 10070, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4797297297297297, "success_rate.epoch.env.logic": 0.5743145743145743, "success_rate.epoch.env.math": 0.9771949828962372, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9577009206270216, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5912576413202254, "success_rate.epoch.global": 0.8537447765991643, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9911380597014925, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9804878048780488, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01951219512195122 }, { "epoch": 1.6747007978723403, "grad_norm": 145.78876682012896, "learning_rate": 1.626604286541304e-07, "loss": 0.1893, "step": 10075, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4797297297297297, "success_rate.epoch.env.logic": 0.5749279538904899, "success_rate.epoch.env.math": 0.9771949828962372, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9577534791252486, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5913181811451474, "success_rate.epoch.global": 0.8538856775850996, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9951923076923077, "tokens_p.mean_in_band": 0.69287109375, "tokens_rate.above_band": 0.9391634980988594, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.060836501901140684 }, { "epoch": 1.675531914893617, "grad_norm": 36.710432490602784, "learning_rate": 1.626471661351264e-07, "loss": 0.2268, "step": 10080, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4797297297297297, "success_rate.epoch.env.logic": 0.5732758620689655, "success_rate.epoch.env.math": 0.9772209567198178, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9575471698113207, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5911515968440682, "success_rate.epoch.global": 0.8535685645549318, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9984214469178082, "tokens_p.mean_below_band": 9.74978320300579e-10, "tokens_p.mean_in_band": 0.5121837797619048, "tokens_rate.above_band": 0.9644921552436003, "tokens_rate.below_band": 0.0008257638315441783, "tokens_rate.in_band": 0.03468208092485549 }, { "epoch": 1.6763630319148937, "grad_norm": 409.60930771746746, "learning_rate": 1.6263393441315128e-07, "loss": 0.2764, "step": 10085, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4797297297297297, "success_rate.epoch.env.logic": 0.5730659025787965, "success_rate.epoch.env.math": 0.9772468714448237, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9575787645745473, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5911377377529831, "success_rate.epoch.global": 0.853549110719436, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996875, "tokens_p.mean_in_band": 0.6640625, "tokens_rate.above_band": 0.9773371104815864, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0226628895184136 }, { "epoch": 1.6771941489361701, "grad_norm": 69.08792672327564, "learning_rate": 1.6262073351168805e-07, "loss": 0.2881, "step": 10090, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4797297297297297, "success_rate.epoch.env.logic": 0.5722460658082976, "success_rate.epoch.env.math": 0.9772727272727273, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9576208178438662, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5910693806917761, "success_rate.epoch.global": 0.8535296942532415, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944098240469208, "tokens_p.mean_in_band": 0.59375, "tokens_rate.above_band": 0.9855491329479769, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014450867052023121 }, { "epoch": 1.6780252659574468, "grad_norm": 29.666642928477334, "learning_rate": 1.6260756345416514e-07, "loss": 0.2498, "step": 10095, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4797297297297297, "success_rate.epoch.env.logic": 0.5714285714285714, "success_rate.epoch.env.math": 0.9772727272727273, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9576732673267326, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.590999831155698, "success_rate.epoch.global": 0.8535103150487766, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9945488721804512, "tokens_p.mean_in_band": 0.7315848214285714, "tokens_rate.above_band": 0.9895833333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010416666666666666 }, { "epoch": 1.6788563829787235, "grad_norm": 59.49638031278995, "learning_rate": 1.6259442426395612e-07, "loss": 0.2152, "step": 10100, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4797297297297297, "success_rate.epoch.env.logic": 0.5714285714285714, "success_rate.epoch.env.math": 0.9772727272727273, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9574678536102869, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5909811571814756, "success_rate.epoch.global": 0.8534441425603324, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957983193277311, "tokens_p.mean_below_band": 8.149072527885437e-09, "tokens_p.mean_in_band": 0.49609375, "tokens_rate.above_band": 0.9916666666666667, "tokens_rate.below_band": 0.004166666666666667, "tokens_rate.in_band": 0.004166666666666667 }, { "epoch": 1.6796875, "grad_norm": 11.649435299692804, "learning_rate": 1.6258131596437986e-07, "loss": 0.2046, "step": 10105, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4594594594594595, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4797297297297297, "success_rate.epoch.env.logic": 0.5714285714285714, "success_rate.epoch.env.math": 0.9772727272727273, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9574783683559951, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5909821130674491, "success_rate.epoch.global": 0.8534675615212528, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966694078947368, "tokens_p.mean_in_band": 0.6659007352941176, "tokens_rate.above_band": 0.9781209781209781, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021879021879021878 }, { "epoch": 1.6805186170212765, "grad_norm": 69.37480691445676, "learning_rate": 1.6256823857870037e-07, "loss": 0.2424, "step": 10110, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4797297297297297, "success_rate.epoch.env.logic": 0.5714285714285714, "success_rate.epoch.env.math": 0.9772727272727273, "success_rate.epoch.env.sat": 0.10305343511450382, "success_rate.epoch.env.science": 0.9575518262586377, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5907084619192088, "success_rate.epoch.global": 0.8534950526651771, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9929078014184397, "tokens_p.mean_in_band": 0.73046875, "tokens_rate.above_band": 0.9825783972125436, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.017421602787456445 }, { "epoch": 1.6813497340425532, "grad_norm": 54.02517472302923, "learning_rate": 1.6255519213012684e-07, "loss": 0.3338, "step": 10115, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47651006711409394, "success_rate.epoch.env.logic": 0.5714285714285714, "success_rate.epoch.env.math": 0.9772985244040863, "success_rate.epoch.env.sat": 0.10266159695817491, "success_rate.epoch.env.science": 0.9575832305795314, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5903853437992349, "success_rate.epoch.global": 0.8533163265306123, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.99124430523918, "tokens_p.mean_in_band": 0.5678310706967213, "tokens_rate.above_band": 0.8436899423446509, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15631005765534914 }, { "epoch": 1.6821808510638299, "grad_norm": 146.83583222193596, "learning_rate": 1.6254217664181346e-07, "loss": 0.2577, "step": 10120, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47651006711409394, "success_rate.epoch.env.logic": 0.5714285714285714, "success_rate.epoch.env.math": 0.9773242630385488, "success_rate.epoch.env.sat": 0.10266159695817491, "success_rate.epoch.env.science": 0.9576145884672252, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5903905343921583, "success_rate.epoch.global": 0.8534098151688974, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.000234962406015, "tokens_p.mean_below_band": 2.384185791015625e-07, "tokens_p.mean_in_band": 0.6331521739130435, "tokens_rate.above_band": 0.9708029197080292, "tokens_rate.below_band": 0.0012165450121654502, "tokens_rate.in_band": 0.027980535279805353 }, { "epoch": 1.6830119680851063, "grad_norm": 0.0, "learning_rate": 1.625291921368596e-07, "loss": 0.3413, "step": 10125, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47651006711409394, "success_rate.epoch.env.logic": 0.5714285714285714, "success_rate.epoch.env.math": 0.9773242630385488, "success_rate.epoch.env.sat": 0.10266159695817491, "success_rate.epoch.env.science": 0.9576771653543307, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.590396223200077, "success_rate.epoch.global": 0.8535498248965298, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99140625, "tokens_p.mean_in_band": 0.8859375, "tokens_rate.above_band": 0.9795918367346939, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02040816326530612 }, { "epoch": 1.683843085106383, "grad_norm": 66.18924581622116, "learning_rate": 1.6251623863830954e-07, "loss": 0.2457, "step": 10130, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47651006711409394, "success_rate.epoch.env.logic": 0.572039942938659, "success_rate.epoch.env.math": 0.9774011299435028, "success_rate.epoch.env.sat": 0.10266159695817491, "success_rate.epoch.env.science": 0.9576771653543307, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5904587903287172, "success_rate.epoch.global": 0.8536430162265352, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9966299019607843, "tokens_p.mean_in_band": 0.61796875, "tokens_rate.above_band": 0.9315068493150684, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0684931506849315 }, { "epoch": 1.6846742021276597, "grad_norm": 40.92045417282505, "learning_rate": 1.6250331616915255e-07, "loss": 0.2115, "step": 10135, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47651006711409394, "success_rate.epoch.env.logic": 0.572039942938659, "success_rate.epoch.env.math": 0.9774266365688488, "success_rate.epoch.env.sat": 0.10606060606060606, "success_rate.epoch.env.science": 0.9577083845586427, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5907729480498162, "success_rate.epoch.global": 0.8537593387378795, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9889794685990339, "tokens_p.mean_in_band": 0.73046875, "tokens_rate.above_band": 0.971830985915493, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028169014084507043 }, { "epoch": 1.6855053191489362, "grad_norm": 71.33398430746944, "learning_rate": 1.6249042475232288e-07, "loss": 0.2197, "step": 10140, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47651006711409394, "success_rate.epoch.env.logic": 0.572039942938659, "success_rate.epoch.env.math": 0.9774266365688488, "success_rate.epoch.env.sat": 0.10566037735849057, "success_rate.epoch.env.science": 0.9577395577395578, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5907393975478888, "success_rate.epoch.global": 0.853693407466243, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.994377444589309, "tokens_p.mean_in_band": 0.7024356617647058, "tokens_rate.above_band": 0.9783163265306123, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021683673469387755 }, { "epoch": 1.6863364361702127, "grad_norm": 263.2534420007352, "learning_rate": 1.6247756441069964e-07, "loss": 0.1926, "step": 10145, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.47651006711409394, "success_rate.epoch.env.logic": 0.572039942938659, "success_rate.epoch.env.math": 0.9774266365688488, "success_rate.epoch.env.sat": 0.10566037735849057, "success_rate.epoch.env.science": 0.9577914110429447, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5907441114845603, "success_rate.epoch.global": 0.8538095238095238, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9949596774193549, "tokens_p.mean_in_band": 0.671875, "tokens_rate.above_band": 0.9908675799086758, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0091324200913242 }, { "epoch": 1.6871675531914894, "grad_norm": 82.19205191007491, "learning_rate": 1.6246473516710675e-07, "loss": 0.2647, "step": 10150, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48, "success_rate.epoch.env.logic": 0.5726495726495726, "success_rate.epoch.env.math": 0.9774520856820744, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.9578328021573915, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5910867645173548, "success_rate.epoch.global": 0.8538363982244769, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9947677752293578, "tokens_p.mean_in_band": 0.5349392361111112, "tokens_rate.above_band": 0.960352422907489, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039647577092511016 }, { "epoch": 1.687998670212766, "grad_norm": 12.523020155808688, "learning_rate": 1.62451937044313e-07, "loss": 0.2109, "step": 10155, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48, "success_rate.epoch.env.logic": 0.5726495726495726, "success_rate.epoch.env.math": 0.9774520856820744, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.9578534672874296, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.59108864316554, "success_rate.epoch.global": 0.8538827258320126, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9949006276150628, "tokens_p.mean_below_band": 4.926614671774132e-16, "tokens_p.mean_in_band": 0.68359375, "tokens_rate.above_band": 0.987603305785124, "tokens_rate.below_band": 0.004132231404958678, "tokens_rate.in_band": 0.008264462809917356 }, { "epoch": 1.6888297872340425, "grad_norm": 119.39907150208509, "learning_rate": 1.624391700650319e-07, "loss": 0.2037, "step": 10160, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48, "success_rate.epoch.env.logic": 0.5726495726495726, "success_rate.epoch.env.math": 0.9774774774774775, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.9579050416054822, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5910956400849451, "success_rate.epoch.global": 0.8540215326155794, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9958916083916084, "tokens_p.mean_in_band": 0.7164713541666666, "tokens_rate.above_band": 0.9834938101788171, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016506189821182942 }, { "epoch": 1.689660904255319, "grad_norm": 42.26509537627254, "learning_rate": 1.6242643425192172e-07, "loss": 0.1724, "step": 10165, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48, "success_rate.epoch.env.logic": 0.5726495726495726, "success_rate.epoch.env.math": 0.9775028121484814, "success_rate.epoch.env.sat": 0.10486891385767791, "success_rate.epoch.env.science": 0.957956489855781, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5910667799835126, "success_rate.epoch.global": 0.8540249881385419, "success_rate.window.env.math": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9929298642533937, "tokens_p.mean_below_band": 2.983724378680108e-16, "tokens_p.mean_in_band": 0.5394287109375, "tokens_rate.above_band": 0.8700787401574803, "tokens_rate.below_band": 0.003937007874015748, "tokens_rate.in_band": 0.12598425196850394 }, { "epoch": 1.6904920212765957, "grad_norm": 28.70767043001444, "learning_rate": 1.6241372962758544e-07, "loss": 0.2155, "step": 10170, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48, "success_rate.epoch.env.logic": 0.5726495726495726, "success_rate.epoch.env.math": 0.9775280898876404, "success_rate.epoch.env.sat": 0.10486891385767791, "success_rate.epoch.env.science": 0.9577739809616793, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5910524862421543, "success_rate.epoch.global": 0.8540284360189574, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9921875, "tokens_p.mean_in_band": 0.65625, "tokens_rate.above_band": 0.9655172413793104, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.034482758620689655 }, { "epoch": 1.6913231382978724, "grad_norm": 35.94883382158681, "learning_rate": 1.6240105621457062e-07, "loss": 0.1965, "step": 10175, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48, "success_rate.epoch.env.logic": 0.5726495726495726, "success_rate.epoch.env.math": 0.9775784753363229, "success_rate.epoch.env.sat": 0.10486891385767791, "success_rate.epoch.env.science": 0.9578254509995124, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5910617458318375, "success_rate.epoch.global": 0.8541896796591447, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9848790322580645, "tokens_p.mean_below_band": 1.5688783605583012e-11, "tokens_p.mean_in_band": 0.86328125, "tokens_rate.above_band": 0.9763779527559056, "tokens_rate.below_band": 0.007874015748031496, "tokens_rate.in_band": 0.015748031496062992 }, { "epoch": 1.6921542553191489, "grad_norm": 31.281027107994543, "learning_rate": 1.6238841403536948e-07, "loss": 0.2562, "step": 10180, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48, "success_rate.epoch.env.logic": 0.5726495726495726, "success_rate.epoch.env.math": 0.9775784753363229, "success_rate.epoch.env.sat": 0.10486891385767791, "success_rate.epoch.env.science": 0.9578460038986355, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5910636142772123, "success_rate.epoch.global": 0.8542356838618078, "success_rate.window.env.abd": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9831288343558282, "tokens_p.mean_in_band": 0.6658653846153846, "tokens_rate.above_band": 0.7358916478555305, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.26410835214446954 }, { "epoch": 1.6929853723404256, "grad_norm": 63.50940821636169, "learning_rate": 1.6237580311241877e-07, "loss": 0.3285, "step": 10185, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5718349928876245, "success_rate.epoch.env.math": 0.9775784753363229, "success_rate.epoch.env.sat": 0.10486891385767791, "success_rate.epoch.env.science": 0.9578972986128012, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5913072891464387, "success_rate.epoch.global": 0.8542388906397731, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.998769915651359, "tokens_p.mean_in_band": 0.47860054347826086, "tokens_rate.above_band": 0.9788990825688073, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02110091743119266 }, { "epoch": 1.6938164893617023, "grad_norm": 24.172246707858598, "learning_rate": 1.623632234680998e-07, "loss": 0.2436, "step": 10190, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5718349928876245, "success_rate.epoch.env.math": 0.9776035834266518, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.9579280155642024, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5912767912486523, "success_rate.epoch.global": 0.8541961895764446, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9892015706806283, "tokens_p.mean_in_band": 0.6983695652173914, "tokens_rate.above_band": 0.8925233644859814, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10747663551401869 }, { "epoch": 1.6946476063829787, "grad_norm": 75.7898734696799, "learning_rate": 1.6235067512473837e-07, "loss": 0.2681, "step": 10195, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.573049645390071, "success_rate.epoch.env.math": 0.9776035834266518, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.9579791110031577, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5913918592433253, "success_rate.epoch.global": 0.8543567159484114, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9973487482614742, "tokens_p.mean_in_band": 0.6661305147058824, "tokens_rate.above_band": 0.9769021739130435, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02309782608695652 }, { "epoch": 1.6954787234042552, "grad_norm": 312.48961708992596, "learning_rate": 1.623381581046047e-07, "loss": 0.2542, "step": 10200, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5736543909348442, "success_rate.epoch.env.math": 0.9776035834266518, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.9580300824842309, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5914514698820386, "success_rate.epoch.global": 0.8544940289126336, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9940944881889764, "tokens_p.mean_in_band": 0.6376953125, "tokens_rate.above_band": 0.9407407407407408, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05925925925925926 }, { "epoch": 1.696309840425532, "grad_norm": 45.30250931641802, "learning_rate": 1.6232567242991345e-07, "loss": 0.2029, "step": 10205, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5742574257425742, "success_rate.epoch.env.math": 0.9776035834266518, "success_rate.epoch.env.sat": 0.10408921933085502, "success_rate.epoch.env.science": 0.9580606060606061, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5914737576797351, "success_rate.epoch.global": 0.8544512482336316, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9908216783216783, "tokens_p.mean_below_band": 5.617039278149605e-09, "tokens_p.mean_in_band": 0.6668198529411765, "tokens_rate.above_band": 0.9407894736842105, "tokens_rate.below_band": 0.003289473684210526, "tokens_rate.in_band": 0.05592105263157895 }, { "epoch": 1.6971409574468086, "grad_norm": 500.92703051734617, "learning_rate": 1.6231321812282362e-07, "loss": 0.2372, "step": 10210, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5734463276836158, "success_rate.epoch.env.math": 0.9776035834266518, "success_rate.epoch.env.sat": 0.10408921933085502, "success_rate.epoch.env.science": 0.9581012351658997, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5914037150475837, "success_rate.epoch.global": 0.8544085346721054, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936926605504587, "tokens_p.mean_in_band": 0.7523082386363636, "tokens_rate.above_band": 0.9674556213017751, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03254437869822485 }, { "epoch": 1.697972074468085, "grad_norm": 30.399568209774646, "learning_rate": 1.6230079520543854e-07, "loss": 0.262, "step": 10215, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5734463276836158, "success_rate.epoch.env.math": 0.9776035834266518, "success_rate.epoch.env.sat": 0.10408921933085502, "success_rate.epoch.env.science": 0.9581519109820996, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5914083219399655, "success_rate.epoch.global": 0.8545226524533626, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9971374045801527, "tokens_p.mean_in_band": 0.6201699746621622, "tokens_rate.above_band": 0.9340463458110517, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0659536541889483 }, { "epoch": 1.6988031914893615, "grad_norm": 63.983874781983076, "learning_rate": 1.6228840369980587e-07, "loss": 0.3167, "step": 10220, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4563758389261745, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5734463276836158, "success_rate.epoch.env.math": 0.9776035834266518, "success_rate.epoch.env.sat": 0.10408921933085502, "success_rate.epoch.env.science": 0.9581721470019342, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5914101615781323, "success_rate.epoch.global": 0.8545682494906754, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9881298449612403, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9923076923076923, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007692307692307693 }, { "epoch": 1.6996343085106385, "grad_norm": 53.81705412365549, "learning_rate": 1.622760436279175e-07, "loss": 0.0836, "step": 10225, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4533333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5746478873239437, "success_rate.epoch.env.math": 0.9776536312849162, "success_rate.epoch.env.sat": 0.10408921933085502, "success_rate.epoch.env.science": 0.958192363460609, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5912491905203529, "success_rate.epoch.global": 0.8545710707576706, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996079335793358, "tokens_p.mean_in_band": 0.376953125, "tokens_rate.above_band": 0.9963235294117647, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003676470588235294 }, { "epoch": 1.700465425531915, "grad_norm": 434.85146269190784, "learning_rate": 1.622637150117095e-07, "loss": 0.3462, "step": 10230, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4533333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5746478873239437, "success_rate.epoch.env.math": 0.9777034559643255, "success_rate.epoch.env.sat": 0.1037037037037037, "success_rate.epoch.env.science": 0.9582528957528957, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5912241760971296, "success_rate.epoch.global": 0.8546193528216351, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9899364406779662, "tokens_p.mean_in_band": 0.649658203125, "tokens_rate.above_band": 0.917098445595855, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08290155440414508 }, { "epoch": 1.7012965425531914, "grad_norm": 167.78709232247775, "learning_rate": 1.6225141787306218e-07, "loss": 0.3321, "step": 10235, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4533333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5738396624472574, "success_rate.epoch.env.math": 0.9777777777777777, "success_rate.epoch.env.sat": 0.1037037037037037, "success_rate.epoch.env.science": 0.9582629674306393, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5911583732439032, "success_rate.epoch.global": 0.8545766947828803, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990126382306477, "tokens_p.mean_in_band": 0.4296875, "tokens_rate.above_band": 0.975346687211094, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02465331278890601 }, { "epoch": 1.702127659574468, "grad_norm": 175.3900408936531, "learning_rate": 1.622391522337999e-07, "loss": 0.3905, "step": 10240, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4533333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5738396624472574, "success_rate.epoch.env.math": 0.9778024417314095, "success_rate.epoch.env.sat": 0.1033210332103321, "success_rate.epoch.env.science": 0.9583132530120482, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5911303986113277, "success_rate.epoch.global": 0.854579497581526, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9915570175438596, "tokens_p.mean_in_band": 0.6846354166666667, "tokens_rate.above_band": 0.95, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05 }, { "epoch": 1.7029587765957448, "grad_norm": 136.28871503097866, "learning_rate": 1.6222691811569124e-07, "loss": 0.2561, "step": 10245, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4533333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.574438202247191, "success_rate.epoch.env.math": 0.9778516057585825, "success_rate.epoch.env.sat": 0.1033210332103321, "success_rate.epoch.env.science": 0.9583734359961501, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5911947519578011, "success_rate.epoch.global": 0.854783421626675, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9938271604938271, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.7037898936170213, "grad_norm": 8.77574415806428, "learning_rate": 1.6221471554044868e-07, "loss": 0.2716, "step": 10250, "success_rate.epoch.env.abd": 0.5952380952380952, "success_rate.epoch.env.agentgym:alfworld": 0.4533333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.574438202247191, "success_rate.epoch.env.math": 0.9779005524861878, "success_rate.epoch.env.sat": 0.10294117647058823, "success_rate.epoch.env.science": 0.9584034623707622, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5911673988998442, "success_rate.epoch.global": 0.8547633872976339, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9933805418719212, "tokens_p.mean_below_band": 1.0477378964424133e-08, "tokens_p.mean_in_band": 0.6450892857142857, "tokens_rate.above_band": 0.875, "tokens_rate.below_band": 0.004310344827586207, "tokens_rate.in_band": 0.1206896551724138 }, { "epoch": 1.7046210106382977, "grad_norm": 39.22581444306054, "learning_rate": 1.6220254452972885e-07, "loss": 0.2501, "step": 10255, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4533333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5742296918767507, "success_rate.epoch.env.math": 0.977924944812362, "success_rate.epoch.env.sat": 0.10294117647058823, "success_rate.epoch.env.science": 0.958423455900024, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5920082119032622, "success_rate.epoch.global": 0.8547433903576983, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961269744835966, "tokens_p.mean_in_band": 0.671875, "tokens_rate.above_band": 0.986810551558753, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013189448441247002 }, { "epoch": 1.7054521276595744, "grad_norm": 57.89307138942302, "learning_rate": 1.6219040510513229e-07, "loss": 0.1655, "step": 10260, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4533333333333333, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5742296918767507, "success_rate.epoch.env.math": 0.977924944812362, "success_rate.epoch.env.sat": 0.10294117647058823, "success_rate.epoch.env.science": 0.9584633853541417, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5920118418536365, "success_rate.epoch.global": 0.8548336959900529, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973821989528796, "tokens_p.mean_in_band": 0.7799479166666666, "tokens_rate.above_band": 0.9922077922077922, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007792207792207792 }, { "epoch": 1.7062832446808511, "grad_norm": 31.96955060423507, "learning_rate": 1.6217829728820355e-07, "loss": 0.4077, "step": 10265, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.45695364238410596, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5734265734265734, "success_rate.epoch.env.math": 0.9779735682819384, "success_rate.epoch.env.sat": 0.10294117647058823, "success_rate.epoch.env.science": 0.9584932821497121, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5922750882959767, "success_rate.epoch.global": 0.8548362055581431, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9960801393728222, "tokens_p.mean_in_band": 0.6644345238095238, "tokens_rate.above_band": 0.9761904761904762, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023809523809523808 }, { "epoch": 1.7071143617021276, "grad_norm": 43.90933398727875, "learning_rate": 1.6216622110043095e-07, "loss": 0.2693, "step": 10270, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5734265734265734, "success_rate.epoch.env.math": 0.9779735682819384, "success_rate.epoch.env.sat": 0.10294117647058823, "success_rate.epoch.env.science": 0.9585231359386238, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.592602590768184, "success_rate.epoch.global": 0.8549262994569433, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9950819672131147, "tokens_p.mean_below_band": 3.213062882423401e-08, "tokens_rate.above_band": 0.9983633387888707, "tokens_rate.below_band": 0.0016366612111292963, "tokens_rate.in_band": 0.0 }, { "epoch": 1.7079454787234043, "grad_norm": 71.79104818520806, "learning_rate": 1.6215417656324685e-07, "loss": 0.2196, "step": 10275, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48344370860927155, "success_rate.epoch.env.logic": 0.5740223463687151, "success_rate.epoch.env.math": 0.977997799779978, "success_rate.epoch.env.sat": 0.10294117647058823, "success_rate.epoch.env.science": 0.958552946813608, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5926616648877444, "success_rate.epoch.global": 0.8550387596899225, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9926609848484849, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.708776595744681, "grad_norm": 244.5435219242203, "learning_rate": 1.6214216369802734e-07, "loss": 0.1956, "step": 10280, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4868421052631579, "success_rate.epoch.env.logic": 0.5738161559888579, "success_rate.epoch.env.math": 0.977997799779978, "success_rate.epoch.env.sat": 0.10294117647058823, "success_rate.epoch.env.science": 0.9585827148671295, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5929545716447945, "success_rate.epoch.global": 0.8550185873605948, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976662777129521, "tokens_p.mean_in_band": 0.5078125, "tokens_rate.above_band": 0.9884659746251442, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011534025374855825 }, { "epoch": 1.7096077127659575, "grad_norm": 34.44395721733804, "learning_rate": 1.6213018252609225e-07, "loss": 0.301, "step": 10285, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4868421052631579, "success_rate.epoch.env.logic": 0.5738161559888579, "success_rate.epoch.env.math": 0.978021978021978, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.958642122878317, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5929278909490402, "success_rate.epoch.global": 0.8550433168316832, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9981831395348837, "tokens_p.mean_in_band": 0.66875, "tokens_rate.above_band": 0.958217270194986, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04178272980501393 }, { "epoch": 1.710438829787234, "grad_norm": 116.68288408940043, "learning_rate": 1.621182330687053e-07, "loss": 0.2429, "step": 10290, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4868421052631579, "success_rate.epoch.env.logic": 0.5738161559888579, "success_rate.epoch.env.math": 0.9780461031833151, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.9586717630195891, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5929327787038228, "success_rate.epoch.global": 0.8551329622758194, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.990234375, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.7112699468085106, "grad_norm": 114.45902887762509, "learning_rate": 1.6210631534707383e-07, "loss": 0.47, "step": 10295, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.42857142857142855, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4868421052631579, "success_rate.epoch.env.logic": 0.5738161559888579, "success_rate.epoch.env.math": 0.9780701754385965, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.9587013607066126, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5929376577894869, "success_rate.epoch.global": 0.8552224969097652, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9919407894736842, "tokens_p.mean_in_band": 0.5783203125, "tokens_rate.above_band": 0.9047619047619048, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09523809523809523 }, { "epoch": 1.7121010638297873, "grad_norm": 93.52849488096403, "learning_rate": 1.6209442938234884e-07, "loss": 0.3634, "step": 10300, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4868421052631579, "success_rate.epoch.env.logic": 0.5738161559888579, "success_rate.epoch.env.math": 0.9780941949616648, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.9587407584068686, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5964066264548108, "success_rate.epoch.global": 0.855356591540599, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9926714601769911, "tokens_p.mean_in_band": 0.8203125, "tokens_rate.above_band": 0.9658119658119658, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03418803418803419 }, { "epoch": 1.7129321808510638, "grad_norm": 89.39181197037155, "learning_rate": 1.6208257519562502e-07, "loss": 0.4059, "step": 10305, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4868421052631579, "success_rate.epoch.env.logic": 0.5736111111111111, "success_rate.epoch.env.math": 0.9780941949616648, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.958541815582559, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5963699003000784, "success_rate.epoch.global": 0.8551819864281308, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.7857142857142857, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995435393258427, "tokens_p.mean_in_band": 0.7138157894736842, "tokens_rate.above_band": 0.9740082079343365, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025991792065663474 }, { "epoch": 1.7137632978723403, "grad_norm": 20.732901057952994, "learning_rate": 1.6207075280794068e-07, "loss": 0.3115, "step": 10310, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48366013071895425, "success_rate.epoch.env.logic": 0.5736111111111111, "success_rate.epoch.env.math": 0.9780941949616648, "success_rate.epoch.env.sat": 0.10256410256410256, "success_rate.epoch.env.science": 0.9585714285714285, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5960833219768662, "success_rate.epoch.global": 0.8551171393341553, "success_rate.window.env.ded": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.2916666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9855603448275863, "tokens_p.mean_below_band": 6.230038707144558e-11, "tokens_p.mean_in_band": 0.5818452380952381, "tokens_rate.above_band": 0.7604895104895105, "tokens_rate.below_band": 0.0008741258741258741, "tokens_rate.in_band": 0.23863636363636365 }, { "epoch": 1.714594414893617, "grad_norm": 106.56519575102756, "learning_rate": 1.6205896224027763e-07, "loss": 0.2949, "step": 10315, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48366013071895425, "success_rate.epoch.env.logic": 0.5736111111111111, "success_rate.epoch.env.math": 0.9781181619256017, "success_rate.epoch.env.sat": 0.10181818181818182, "success_rate.epoch.env.science": 0.9586108468125595, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5960212732913341, "success_rate.epoch.global": 0.8549653579676675, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.6111111111111112, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9947660098522167, "tokens_p.mean_below_band": 4.3655745685100555e-09, "tokens_p.mean_in_band": 0.6572916666666667, "tokens_rate.above_band": 0.8675213675213675, "tokens_rate.below_band": 0.004273504273504274, "tokens_rate.in_band": 0.1282051282051282 }, { "epoch": 1.7154255319148937, "grad_norm": 102.50648502958545, "learning_rate": 1.6204720351356125e-07, "loss": 0.2732, "step": 10320, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.48366013071895425, "success_rate.epoch.env.logic": 0.5736111111111111, "success_rate.epoch.env.math": 0.9781181619256017, "success_rate.epoch.env.sat": 0.10144927536231885, "success_rate.epoch.env.science": 0.9586501901140685, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5959913130045748, "success_rate.epoch.global": 0.8549230769230769, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9971969696969697, "tokens_p.mean_in_band": 0.6460822610294118, "tokens_rate.above_band": 0.9604190919674039, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03958090803259604 }, { "epoch": 1.7162566489361701, "grad_norm": 101.46431613221785, "learning_rate": 1.6203547664866039e-07, "loss": 0.2174, "step": 10325, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.487012987012987, "success_rate.epoch.env.logic": 0.5736111111111111, "success_rate.epoch.env.math": 0.9781181619256017, "success_rate.epoch.env.sat": 0.10108303249097472, "success_rate.epoch.env.science": 0.9587188612099644, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5962690661426279, "success_rate.epoch.global": 0.8549700414810263, "success_rate.window.env.ded": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9938969258589512, "tokens_p.mean_in_band": 0.7200520833333334, "tokens_rate.above_band": 0.968476357267951, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03152364273204904 }, { "epoch": 1.7170877659574468, "grad_norm": 135.6309527122214, "learning_rate": 1.620237816663874e-07, "loss": 0.2315, "step": 10330, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.487012987012987, "success_rate.epoch.env.logic": 0.5736111111111111, "success_rate.epoch.env.math": 0.9781420765027322, "success_rate.epoch.env.sat": 0.10108303249097472, "success_rate.epoch.env.science": 0.958757999525954, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5962747982238206, "success_rate.epoch.global": 0.855081363217685, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9896907216494846, "tokens_p.mean_in_band": 0.7215909090909091, "tokens_rate.above_band": 0.9463414634146341, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05365853658536585 }, { "epoch": 1.7179188829787235, "grad_norm": 78.59761609004616, "learning_rate": 1.6201211858749794e-07, "loss": 0.4263, "step": 10335, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.487012987012987, "success_rate.epoch.env.logic": 0.574792243767313, "success_rate.epoch.env.math": 0.9781420765027322, "success_rate.epoch.env.sat": 0.10108303249097472, "success_rate.epoch.env.science": 0.9587970636987924, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5963857252082788, "success_rate.epoch.global": 0.8552147239263803, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9918154761904762, "tokens_p.mean_in_band": 0.8854166666666666, "tokens_rate.above_band": 0.9739130434782609, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02608695652173913 }, { "epoch": 1.71875, "grad_norm": 60.52785511157568, "learning_rate": 1.6200048743269118e-07, "loss": 0.1425, "step": 10340, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.487012987012987, "success_rate.epoch.env.logic": 0.574792243767313, "success_rate.epoch.env.math": 0.9781420765027322, "success_rate.epoch.env.sat": 0.10108303249097472, "success_rate.epoch.env.science": 0.9588263132986276, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5963883842628093, "success_rate.epoch.global": 0.8552813122796259, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0004222972972974, "tokens_p.mean_in_band": 0.6685267857142857, "tokens_rate.above_band": 0.976897689768977, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0231023102310231 }, { "epoch": 1.7195811170212765, "grad_norm": 103.86867022095156, "learning_rate": 1.6198888822260952e-07, "loss": 0.323, "step": 10345, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.487012987012987, "success_rate.epoch.env.logic": 0.5753803596127247, "success_rate.epoch.env.math": 0.9781897491821155, "success_rate.epoch.env.sat": 0.10071942446043165, "success_rate.epoch.env.science": 0.9588457899716177, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5964148985507403, "success_rate.epoch.global": 0.8552611425945781, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.5599999999999999, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9885367298578199, "tokens_p.mean_below_band": 4.4411353883333504e-07, "tokens_p.mean_in_band": 0.5539869225543478, "tokens_rate.above_band": 0.7403508771929824, "tokens_rate.below_band": 0.0014035087719298245, "tokens_rate.in_band": 0.2582456140350877 }, { "epoch": 1.7204122340425532, "grad_norm": 274.99167270874347, "learning_rate": 1.6197732097783875e-07, "loss": 0.2658, "step": 10350, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.487012987012987, "success_rate.epoch.env.logic": 0.576551724137931, "success_rate.epoch.env.math": 0.9781897491821155, "success_rate.epoch.env.sat": 0.1003584229390681, "success_rate.epoch.env.science": 0.9588652482269504, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.596490336847029, "success_rate.epoch.global": 0.8552188552188552, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9960550742574258, "tokens_p.mean_in_band": 0.7421875, "tokens_rate.above_band": 0.9607609988109393, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039239001189060645 }, { "epoch": 1.7212433510638299, "grad_norm": 57.14280439553784, "learning_rate": 1.619657857189079e-07, "loss": 0.1345, "step": 10355, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4605263157894737, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49032258064516127, "success_rate.epoch.env.logic": 0.576551724137931, "success_rate.epoch.env.math": 0.9782135076252724, "success_rate.epoch.env.sat": 0.1003584229390681, "success_rate.epoch.env.science": 0.958913813459268, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.596797783874997, "success_rate.epoch.global": 0.855373796055649, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9863119834710744, "tokens_p.mean_in_band": 0.8367660984848485, "tokens_rate.above_band": 0.9361702127659575, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06382978723404255 }, { "epoch": 1.7220744680851063, "grad_norm": 39.674688935714364, "learning_rate": 1.6195428246628921e-07, "loss": 0.2505, "step": 10360, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.46405228758169936, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49032258064516127, "success_rate.epoch.env.logic": 0.576551724137931, "success_rate.epoch.env.math": 0.9782135076252724, "success_rate.epoch.env.sat": 0.1003584229390681, "success_rate.epoch.env.science": 0.958913813459268, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5971183267651994, "success_rate.epoch.global": 0.8553959033934576, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0006067961165048, "tokens_p.mean_in_band": 0.48388671875, "tokens_rate.above_band": 0.9914438502673797, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008556149732620321 }, { "epoch": 1.722905585106383, "grad_norm": 146.885037908392, "learning_rate": 1.6194281124039818e-07, "loss": 0.279, "step": 10365, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.46405228758169936, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4935897435897436, "success_rate.epoch.env.logic": 0.576551724137931, "success_rate.epoch.env.math": 0.9782608695652174, "success_rate.epoch.env.sat": 0.1003584229390681, "success_rate.epoch.env.science": 0.9589428975932044, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5974222912214233, "success_rate.epoch.global": 0.8555284056200366, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9996926229508196, "tokens_p.mean_in_band": 0.6299252717391305, "tokens_rate.above_band": 0.9875876956287102, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0124123043712898 }, { "epoch": 1.7237367021276597, "grad_norm": 63.79654858497648, "learning_rate": 1.6193137206159332e-07, "loss": 0.2423, "step": 10370, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4675324675324675, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4935897435897436, "success_rate.epoch.env.logic": 0.576551724137931, "success_rate.epoch.env.math": 0.9783315276273022, "success_rate.epoch.env.sat": 0.1, "success_rate.epoch.env.science": 0.9589428975932044, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5977125107735857, "success_rate.epoch.global": 0.8554860369296505, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9898549910071942, "tokens_p.mean_below_band": 7.338821887969971e-07, "tokens_p.mean_in_band": 0.5164233576642335, "tokens_rate.above_band": 0.8017303532804614, "tokens_rate.below_band": 0.0007209805335255948, "tokens_rate.in_band": 0.19754866618601297 }, { "epoch": 1.7245678191489362, "grad_norm": 173.8553614973995, "learning_rate": 1.6191996495017648e-07, "loss": 0.2652, "step": 10375, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4675324675324675, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4935897435897436, "success_rate.epoch.env.logic": 0.576551724137931, "success_rate.epoch.env.math": 0.9783549783549783, "success_rate.epoch.env.sat": 0.099644128113879, "success_rate.epoch.env.science": 0.9590009425070688, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5976875674786238, "success_rate.epoch.global": 0.8555098308184728, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9904371584699454, "tokens_p.mean_in_band": 0.6208333333333333, "tokens_rate.above_band": 0.9242424242424242, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07575757575757576 }, { "epoch": 1.7253989361702127, "grad_norm": 45.36719262322075, "learning_rate": 1.6190858992639246e-07, "loss": 0.3356, "step": 10380, "success_rate.epoch.env.abd": 0.6046511627906976, "success_rate.epoch.env.agentgym:alfworld": 0.4675324675324675, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49044585987261147, "success_rate.epoch.env.logic": 0.5757575757575758, "success_rate.epoch.env.math": 0.9783549783549783, "success_rate.epoch.env.sat": 0.099644128113879, "success_rate.epoch.env.science": 0.9590491880442457, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5973339505185956, "success_rate.epoch.global": 0.8553593179049939, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9920753588516746, "tokens_p.mean_in_band": 0.6927816901408451, "tokens_rate.above_band": 0.8982808022922636, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1017191977077364 }, { "epoch": 1.7262300531914894, "grad_norm": 57.86538853952683, "learning_rate": 1.6189724701042907e-07, "loss": 0.2898, "step": 10385, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4675324675324675, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4936708860759494, "success_rate.epoch.env.logic": 0.5757575757575758, "success_rate.epoch.env.math": 0.978401727861771, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.9590780809031044, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5984187251951973, "success_rate.epoch.global": 0.8553832116788321, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949825479930192, "tokens_p.mean_in_band": 0.7357536764705882, "tokens_rate.above_band": 0.9711864406779661, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0288135593220339 }, { "epoch": 1.727061170212766, "grad_norm": 58.51387472457898, "learning_rate": 1.618859362224173e-07, "loss": 0.2234, "step": 10390, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4675324675324675, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49056603773584906, "success_rate.epoch.env.logic": 0.5757575757575758, "success_rate.epoch.env.math": 0.978401727861771, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.9590973201692524, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5981382152793835, "success_rate.epoch.global": 0.8552971576227391, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9902991107518189, "tokens_p.mean_in_band": 0.6035067873303167, "tokens_rate.above_band": 0.8484224965706447, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.15157750342935528 }, { "epoch": 1.7278922872340425, "grad_norm": 498.98309869079554, "learning_rate": 1.6187465758243098e-07, "loss": 0.1654, "step": 10395, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4675324675324675, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49056603773584906, "success_rate.epoch.env.logic": 0.5757575757575758, "success_rate.epoch.env.math": 0.9784250269687162, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.9591357444809769, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5981438264992626, "success_rate.epoch.global": 0.8554070473876063, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9881198347107438, "tokens_p.mean_in_band": 0.84375, "tokens_rate.above_band": 0.952755905511811, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.047244094488188976 }, { "epoch": 1.728723404255319, "grad_norm": 147.55376008926604, "learning_rate": 1.6186341111048696e-07, "loss": 0.3727, "step": 10400, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4675324675324675, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49056603773584906, "success_rate.epoch.env.logic": 0.5749656121045392, "success_rate.epoch.env.math": 0.978448275862069, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.9591453392815215, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5980748155938863, "success_rate.epoch.global": 0.8553210869895248, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9936509986225895, "tokens_p.mean_in_band": 0.681586869266055, "tokens_rate.above_band": 0.9301729660474055, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06982703395259449 }, { "epoch": 1.7295545212765957, "grad_norm": 104.33171998318714, "learning_rate": 1.6185219682654495e-07, "loss": 0.1567, "step": 10405, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4675324675324675, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49056603773584906, "success_rate.epoch.env.logic": 0.5755494505494505, "success_rate.epoch.env.math": 0.978448275862069, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.9591740966682308, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5981305061240336, "success_rate.epoch.global": 0.8554088909118495, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.999304008908686, "tokens_p.mean_in_band": 0.7515625, "tokens_rate.above_band": 0.9889867841409692, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011013215859030838 }, { "epoch": 1.7303856382978724, "grad_norm": 19.196243021943218, "learning_rate": 1.6184101475050756e-07, "loss": 0.2917, "step": 10410, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4675324675324675, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49056603773584906, "success_rate.epoch.env.logic": 0.5755494505494505, "success_rate.epoch.env.math": 0.9784714747039828, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.9592123769338959, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5981360951338135, "success_rate.epoch.global": 0.8555184960582171, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9931437644341802, "tokens_p.mean_in_band": 0.74609375, "tokens_rate.above_band": 0.9665178571428571, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033482142857142856 }, { "epoch": 1.7312167553191489, "grad_norm": 45.34125122882448, "learning_rate": 1.6182986490222024e-07, "loss": 0.302, "step": 10415, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49056603773584906, "success_rate.epoch.env.logic": 0.5747599451303155, "success_rate.epoch.env.math": 0.978494623655914, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.9592314901593252, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5977939513390876, "success_rate.epoch.global": 0.8553249507650356, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.3666666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9997020262216925, "tokens_p.mean_below_band": 3.259629011154175e-07, "tokens_p.mean_in_band": 0.6005284926470589, "tokens_rate.above_band": 0.9789964994165694, "tokens_rate.below_band": 0.0011668611435239206, "tokens_rate.in_band": 0.019836639439906652 }, { "epoch": 1.7320478723404256, "grad_norm": 79.44963280425318, "learning_rate": 1.6181874730147124e-07, "loss": 0.3958, "step": 10420, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4875, "success_rate.epoch.env.logic": 0.5747599451303155, "success_rate.epoch.env.math": 0.978494623655914, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.9592601264340904, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5975178239335346, "success_rate.epoch.global": 0.8552611657834973, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9979173750425026, "tokens_p.mean_in_band": 0.680908203125, "tokens_rate.above_band": 0.9387168847749761, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.061283115225023936 }, { "epoch": 1.7328789893617023, "grad_norm": 181.90025568672738, "learning_rate": 1.6180766196799153e-07, "loss": 0.2172, "step": 10425, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4875, "success_rate.epoch.env.logic": 0.5747599451303155, "success_rate.epoch.env.math": 0.978494623655914, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.9593172784662146, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5975230195728186, "success_rate.epoch.global": 0.855392527605506, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9909957627118644, "tokens_p.mean_in_band": 0.880859375, "tokens_rate.above_band": 0.9672131147540983, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03278688524590164 }, { "epoch": 1.7337101063829787, "grad_norm": 64.85418381299172, "learning_rate": 1.6179660892145494e-07, "loss": 0.1828, "step": 10430, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4875, "success_rate.epoch.env.logic": 0.5747599451303155, "success_rate.epoch.env.math": 0.978494623655914, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.9593647828117702, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5975273381496873, "success_rate.epoch.global": 0.8555018137847642, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952496859296482, "tokens_p.mean_in_band": 0.5434027777777778, "tokens_rate.above_band": 0.9778869778869779, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022113022113022112 }, { "epoch": 1.7345412234042552, "grad_norm": 36.679096149326476, "learning_rate": 1.617855881814779e-07, "loss": 0.2724, "step": 10435, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4875, "success_rate.epoch.env.logic": 0.5753424657534246, "success_rate.epoch.env.math": 0.9785177228786252, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.9593932322053675, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5975849808078162, "success_rate.epoch.global": 0.855610934904093, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967447916666666, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.735372340425532, "grad_norm": 59.830329808567896, "learning_rate": 1.6177459976761948e-07, "loss": 0.2245, "step": 10440, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4875, "success_rate.epoch.env.logic": 0.5759233926128591, "success_rate.epoch.env.math": 0.9785637727759914, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.9594216417910447, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.597644561384405, "success_rate.epoch.global": 0.8557416628942206, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9916125541125541, "tokens_p.mean_in_band": 0.86171875, "tokens_rate.above_band": 0.9788135593220338, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0211864406779661 }, { "epoch": 1.7362034574468086, "grad_norm": 144.78075304979205, "learning_rate": 1.617636436993815e-07, "loss": 0.2663, "step": 10445, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4906832298136646, "success_rate.epoch.env.logic": 0.5770804911323328, "success_rate.epoch.env.math": 0.9785637727759914, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.95947834187238, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5980442912402663, "success_rate.epoch.global": 0.8559373116335142, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9901256732495511, "tokens_p.mean_in_band": 0.87734375, "tokens_rate.above_band": 0.982363315696649, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01763668430335097 }, { "epoch": 1.737034574468085, "grad_norm": 49.76619192841472, "learning_rate": 1.617527199962082e-07, "loss": 0.3506, "step": 10450, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4906832298136646, "success_rate.epoch.env.logic": 0.5770804911323328, "success_rate.epoch.env.math": 0.9785637727759914, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.9595160539785947, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5980477196135585, "success_rate.epoch.global": 0.8560240963855422, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9980459835691465, "tokens_p.mean_in_band": 0.6064453125, "tokens_rate.above_band": 0.9856050382366172, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014394961763382817 }, { "epoch": 1.7378656914893615, "grad_norm": 45.83232461524707, "learning_rate": 1.6174182867748652e-07, "loss": 0.245, "step": 10455, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4906832298136646, "success_rate.epoch.env.logic": 0.5770804911323328, "success_rate.epoch.env.math": 0.9785867237687366, "success_rate.epoch.env.sat": 0.09929078014184398, "success_rate.epoch.env.science": 0.9595630955147572, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.598054082570732, "success_rate.epoch.global": 0.8561540776406861, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.000457579672696, "tokens_p.mean_in_band": 0.5912298387096774, "tokens_rate.above_band": 0.9739932885906041, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026006711409395974 }, { "epoch": 1.7386968085106385, "grad_norm": 14.621711006868432, "learning_rate": 1.617309697625459e-07, "loss": 0.1439, "step": 10460, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4906832298136646, "success_rate.epoch.env.logic": 0.5776566757493188, "success_rate.epoch.env.math": 0.9786324786324786, "success_rate.epoch.env.sat": 0.0989399293286219, "success_rate.epoch.env.science": 0.9595912679981421, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5980812881299039, "success_rate.epoch.global": 0.8561551179918834, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956121575342466, "tokens_p.mean_in_band": 0.7606336805555556, "tokens_rate.above_band": 0.9419354838709677, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05806451612903226 }, { "epoch": 1.739527925531915, "grad_norm": 62.87382226545363, "learning_rate": 1.6172014327065827e-07, "loss": 0.3308, "step": 10465, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4906832298136646, "success_rate.epoch.env.logic": 0.5776566757493188, "success_rate.epoch.env.math": 0.9786780383795309, "success_rate.epoch.env.sat": 0.09859154929577464, "success_rate.epoch.env.science": 0.9596287703016241, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.59805716831333, "success_rate.epoch.global": 0.8561561561561561, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995752427184466, "tokens_p.mean_in_band": 0.6630859375, "tokens_rate.above_band": 0.865546218487395, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13445378151260504 }, { "epoch": 1.7403590425531914, "grad_norm": 34.247594341288874, "learning_rate": 1.61709349221038e-07, "loss": 0.2736, "step": 10470, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4906832298136646, "success_rate.epoch.env.logic": 0.5760869565217391, "success_rate.epoch.env.math": 0.9786780383795309, "success_rate.epoch.env.sat": 0.09859154929577464, "success_rate.epoch.env.science": 0.9596474953617811, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5979161688435642, "success_rate.epoch.global": 0.8559423769507803, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9963070497147515, "tokens_p.mean_in_band": 0.5689019097222222, "tokens_rate.above_band": 0.9714964370546318, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028503562945368172 }, { "epoch": 1.741190159574468, "grad_norm": 269.5388240565678, "learning_rate": 1.6169858763284178e-07, "loss": 0.1886, "step": 10475, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49079754601226994, "success_rate.epoch.env.logic": 0.5760869565217391, "success_rate.epoch.env.math": 0.9786780383795309, "success_rate.epoch.env.sat": 0.09859154929577464, "success_rate.epoch.env.science": 0.9596848934198332, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5979299610487149, "success_rate.epoch.global": 0.8559220389805098, "success_rate.window.env.ded": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9930200180614088, "tokens_p.mean_in_band": 0.6934956395348837, "tokens_rate.above_band": 0.987221396731055, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.012778603268945022 }, { "epoch": 1.7420212765957448, "grad_norm": 229.2316977777791, "learning_rate": 1.616878585251689e-07, "loss": 0.2363, "step": 10480, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49079754601226994, "success_rate.epoch.env.logic": 0.5772357723577236, "success_rate.epoch.env.math": 0.9786780383795309, "success_rate.epoch.env.sat": 0.09859154929577464, "success_rate.epoch.env.science": 0.9597128965038203, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5980369445868942, "success_rate.epoch.global": 0.8560299625468165, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.996871362048894, "tokens_p.mean_in_band": 0.7535807291666666, "tokens_rate.above_band": 0.9930635838150289, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006936416184971098 }, { "epoch": 1.7428523936170213, "grad_norm": 284.467066088806, "learning_rate": 1.6167716191706087e-07, "loss": 0.2567, "step": 10485, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49079754601226994, "success_rate.epoch.env.logic": 0.5772357723577236, "success_rate.epoch.env.math": 0.9786780383795309, "success_rate.epoch.env.sat": 0.09859154929577464, "success_rate.epoch.env.science": 0.9597501734906315, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.598040333403877, "success_rate.epoch.global": 0.8561161850576433, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9963804713804714, "tokens_p.mean_in_band": 0.6646581338652482, "tokens_rate.above_band": 0.9132841328413284, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08671586715867159 }, { "epoch": 1.7436835106382977, "grad_norm": 139.3418643651137, "learning_rate": 1.6166649782750152e-07, "loss": 0.1964, "step": 10490, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4645161290322581, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49390243902439024, "success_rate.epoch.env.logic": 0.577807848443843, "success_rate.epoch.env.math": 0.9786780383795309, "success_rate.epoch.env.sat": 0.09859154929577464, "success_rate.epoch.env.science": 0.9597780859916782, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.598377140821994, "success_rate.epoch.global": 0.8562238180730102, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.992269392033543, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.997907949790795, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0020920502092050207 }, { "epoch": 1.7445146276595744, "grad_norm": 113.38370202310955, "learning_rate": 1.6165586627541704e-07, "loss": 0.3081, "step": 10495, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46794871794871795, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49390243902439024, "success_rate.epoch.env.logic": 0.5783783783783784, "success_rate.epoch.env.math": 0.9787007454739084, "success_rate.epoch.env.sat": 0.09859154929577464, "success_rate.epoch.env.science": 0.9598152424942263, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.598746502862714, "success_rate.epoch.global": 0.8563742340457331, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.993455497382199, "tokens_p.mean_in_band": 0.7376302083333334, "tokens_rate.above_band": 0.9408866995073891, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.059113300492610835 }, { "epoch": 1.7453457446808511, "grad_norm": 161.48573772761986, "learning_rate": 1.6164526727967583e-07, "loss": 0.2275, "step": 10500, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46794871794871795, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49390243902439024, "success_rate.epoch.env.logic": 0.5789473684210527, "success_rate.epoch.env.math": 0.9787234042553191, "success_rate.epoch.env.sat": 0.0979020979020979, "success_rate.epoch.env.science": 0.9598523304107061, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5987409833487948, "success_rate.epoch.global": 0.8562472010747872, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.992816091954023, "tokens_p.mean_in_band": 0.5581896551724138, "tokens_rate.above_band": 0.9, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1 }, { "epoch": 1.7461768617021276, "grad_norm": 44.95849973332664, "learning_rate": 1.6163470085908848e-07, "loss": 0.304, "step": 10505, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46496815286624205, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49390243902439024, "success_rate.epoch.env.logic": 0.5789473684210527, "success_rate.epoch.env.math": 0.9787460148777896, "success_rate.epoch.env.sat": 0.0975609756097561, "success_rate.epoch.env.science": 0.9598893499308437, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5984444326913212, "success_rate.epoch.global": 0.8560990158067402, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9960548172757475, "tokens_p.mean_in_band": 0.6958705357142857, "tokens_rate.above_band": 0.9148936170212766, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0851063829787234 }, { "epoch": 1.7470079787234043, "grad_norm": 92.58629026333666, "learning_rate": 1.616241670324078e-07, "loss": 0.2984, "step": 10510, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46496815286624205, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5789473684210527, "success_rate.epoch.env.math": 0.9787685774946921, "success_rate.epoch.env.sat": 0.0975609756097561, "success_rate.epoch.env.science": 0.9599263012436665, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5987286846799605, "success_rate.epoch.global": 0.856227651966627, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957957342083675, "tokens_p.mean_in_band": 0.7117745535714286, "tokens_rate.above_band": 0.9886455798864558, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0113544201135442 }, { "epoch": 1.747839095744681, "grad_norm": 281.1879377554055, "learning_rate": 1.6161366581832878e-07, "loss": 0.2461, "step": 10515, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46496815286624205, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5789473684210527, "success_rate.epoch.env.math": 0.9787685774946921, "success_rate.epoch.env.sat": 0.0975609756097561, "success_rate.epoch.env.science": 0.9599723947550034, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5987328749991729, "success_rate.epoch.global": 0.85633467321721, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9857324399260629, "tokens_p.mean_below_band": 1.0058283805847168e-06, "tokens_p.mean_in_band": 0.7421223958333333, "tokens_rate.above_band": 0.8172205438066465, "tokens_rate.below_band": 0.0015105740181268882, "tokens_rate.in_band": 0.18126888217522658 }, { "epoch": 1.7486702127659575, "grad_norm": 63.34607721558829, "learning_rate": 1.6160319723548847e-07, "loss": 0.186, "step": 10520, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46496815286624205, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5795148247978437, "success_rate.epoch.env.math": 0.9787685774946921, "success_rate.epoch.env.sat": 0.0975609756097561, "success_rate.epoch.env.science": 0.9599908024833295, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5987861353723655, "success_rate.epoch.global": 0.8563988095238095, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9943467336683417, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9851485148514851, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01485148514851485 }, { "epoch": 1.749501329787234, "grad_norm": 22.65245025462587, "learning_rate": 1.6159276130246606e-07, "loss": 0.1918, "step": 10525, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46496815286624205, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5795148247978437, "success_rate.epoch.env.math": 0.9787910922587487, "success_rate.epoch.env.sat": 0.10380622837370242, "success_rate.epoch.env.science": 0.9600275671950379, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5993592746668847, "success_rate.epoch.global": 0.856548238442099, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9890684410646388, "tokens_p.mean_in_band": 0.7847222222222222, "tokens_rate.above_band": 0.9669117647058824, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03308823529411765 }, { "epoch": 1.7503324468085106, "grad_norm": 51.56959633581304, "learning_rate": 1.6158235803778275e-07, "loss": 0.2721, "step": 10530, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46496815286624205, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5787348586810229, "success_rate.epoch.env.math": 0.9787910922587487, "success_rate.epoch.env.sat": 0.10380622837370242, "success_rate.epoch.env.science": 0.9600734281780633, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5992925378365396, "success_rate.epoch.global": 0.8565275508688549, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994343471810089, "tokens_p.mean_below_band": 3.841705620288849e-09, "tokens_p.mean_in_band": 0.6805752840909091, "tokens_rate.above_band": 0.9670014347202296, "tokens_rate.below_band": 0.0014347202295552368, "tokens_rate.in_band": 0.03156384505021521 }, { "epoch": 1.7511635638297873, "grad_norm": 126.4828648171792, "learning_rate": 1.6157198745990182e-07, "loss": 0.1748, "step": 10535, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46496815286624205, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5793010752688172, "success_rate.epoch.env.math": 0.9787910922587487, "success_rate.epoch.env.sat": 0.10380622837370242, "success_rate.epoch.env.science": 0.9601191840476736, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5993481716963036, "success_rate.epoch.global": 0.856655290102389, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9924089068825911, "tokens_p.mean_below_band": 1.126900315284729e-07, "tokens_p.mean_in_band": 0.8645833333333334, "tokens_rate.above_band": 0.9840637450199203, "tokens_rate.below_band": 0.00398406374501992, "tokens_rate.in_band": 0.01195219123505976 }, { "epoch": 1.7519946808510638, "grad_norm": 40.72336368621944, "learning_rate": 1.615616495872285e-07, "loss": 0.3656, "step": 10540, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46496815286624205, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5777479892761395, "success_rate.epoch.env.math": 0.9788135593220338, "success_rate.epoch.env.sat": 0.10380622837370242, "success_rate.epoch.env.science": 0.9601465872652314, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5992115157225004, "success_rate.epoch.global": 0.8564862861378799, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9935196492007105, "tokens_p.mean_in_band": 0.5662370017331022, "tokens_rate.above_band": 0.8864396772288919, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11356032277110804 }, { "epoch": 1.7528257978723403, "grad_norm": 40.38528191942742, "learning_rate": 1.6155134443810997e-07, "loss": 0.1394, "step": 10545, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46496815286624205, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5777479892761395, "success_rate.epoch.env.math": 0.9788359788359788, "success_rate.epoch.env.sat": 0.10380622837370242, "success_rate.epoch.env.science": 0.960183066361556, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5992168701416158, "success_rate.epoch.global": 0.8565925925925926, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957344213649851, "tokens_p.mean_in_band": 0.7295673076923077, "tokens_rate.above_band": 0.9810771470160117, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018922852983988356 }, { "epoch": 1.753656914893617, "grad_norm": 99.92699471056089, "learning_rate": 1.6154107203083533e-07, "loss": 0.215, "step": 10550, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46496815286624205, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5777479892761395, "success_rate.epoch.env.math": 0.9788806758183738, "success_rate.epoch.env.sat": 0.10380622837370242, "success_rate.epoch.env.science": 0.9602103818888634, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5992234167334071, "success_rate.epoch.global": 0.8566987416728349, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9906965648854962, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.7544880319148937, "grad_norm": 15.806897027386105, "learning_rate": 1.615308323836356e-07, "loss": 0.1437, "step": 10555, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46835443037974683, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5777479892761395, "success_rate.epoch.env.math": 0.9789029535864979, "success_rate.epoch.env.sat": 0.10380622837370242, "success_rate.epoch.env.science": 0.9602467443454421, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5995365910732441, "success_rate.epoch.global": 0.856825913326431, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.994281045751634, "tokens_p.mean_in_band": 0.81640625, "tokens_rate.above_band": 0.996742671009772, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003257328990228013 }, { "epoch": 1.7553191489361701, "grad_norm": 17.631819211192283, "learning_rate": 1.6152062551468369e-07, "loss": 0.2076, "step": 10560, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46835443037974683, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5783132530120482, "success_rate.epoch.env.math": 0.9789029535864979, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.9602467443454421, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5995554375481782, "success_rate.epoch.global": 0.856720390359308, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9929187192118226, "tokens_p.mean_in_band": 0.6519396551724138, "tokens_rate.above_band": 0.9545454545454546, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.045454545454545456 }, { "epoch": 1.7561502659574468, "grad_norm": 67.89578263728303, "learning_rate": 1.615104514420942e-07, "loss": 0.2159, "step": 10565, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46835443037974683, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5783132530120482, "success_rate.epoch.env.math": 0.9789473684210527, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.9602649006622517, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5995611258346658, "success_rate.epoch.global": 0.856805083493424, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9933510638297872, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9929577464788732, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007042253521126761 }, { "epoch": 1.7569813829787235, "grad_norm": 272.199993757412, "learning_rate": 1.615003101839237e-07, "loss": 0.375, "step": 10570, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46835443037974683, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5788770053475936, "success_rate.epoch.env.math": 0.9789915966386554, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.9602739726027397, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5996172215159055, "success_rate.epoch.global": 0.8568896765618077, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9977080908032596, "tokens_p.mean_in_band": 0.72509765625, "tokens_rate.above_band": 0.9817142857142858, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018285714285714287 }, { "epoch": 1.7578125, "grad_norm": 4.328831016811341, "learning_rate": 1.614902017581704e-07, "loss": 0.2748, "step": 10575, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46835443037974683, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5794392523364486, "success_rate.epoch.env.math": 0.9789915966386554, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.9603102189781022, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5996716300035616, "success_rate.epoch.global": 0.856995277449823, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986942896935933, "tokens_p.mean_in_band": 0.52734375, "tokens_rate.above_band": 0.9972222222222222, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002777777777777778 }, { "epoch": 1.7586436170212765, "grad_norm": 16.380425973142383, "learning_rate": 1.6148012618277427e-07, "loss": 0.1598, "step": 10580, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46835443037974683, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5794392523364486, "success_rate.epoch.env.math": 0.9789915966386554, "success_rate.epoch.env.sat": 0.10344827586206896, "success_rate.epoch.env.science": 0.9603463992707384, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.599674919121074, "success_rate.epoch.global": 0.8570796460176991, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9921511627906977, "tokens_p.mean_in_band": 0.439453125, "tokens_rate.above_band": 0.9953703703703703, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004629629629629629 }, { "epoch": 1.7594747340425532, "grad_norm": 87.07863047658616, "learning_rate": 1.6147008347561698e-07, "loss": 0.1633, "step": 10585, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.46540880503144655, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49696969696969695, "success_rate.epoch.env.logic": 0.5786666666666667, "success_rate.epoch.env.math": 0.9789915966386554, "success_rate.epoch.env.sat": 0.10309278350515463, "success_rate.epoch.env.science": 0.960145752675928, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5992863418510915, "success_rate.epoch.global": 0.8566165635131152, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.41666666666666663, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9990371734762223, "tokens_p.mean_below_band": 4.274625098332763e-11, "tokens_p.mean_in_band": 0.6464251893939394, "tokens_rate.above_band": 0.9777341191879503, "tokens_rate.below_band": 0.0006548788474132286, "tokens_rate.in_band": 0.021611001964636542 }, { "epoch": 1.7603058510638299, "grad_norm": 78.82315458697826, "learning_rate": 1.6146007365452194e-07, "loss": 0.3379, "step": 10590, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4939759036144578, "success_rate.epoch.env.logic": 0.5792276964047937, "success_rate.epoch.env.math": 0.9790356394129979, "success_rate.epoch.env.sat": 0.10238907849829351, "success_rate.epoch.env.science": 0.9601548269581056, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5987416003421007, "success_rate.epoch.global": 0.8561966440977333, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9946760193782802, "tokens_p.mean_in_band": 0.5518380979073244, "tokens_rate.above_band": 0.8810243642183888, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11897563578161124 }, { "epoch": 1.7611369680851063, "grad_norm": 76.83334921370717, "learning_rate": 1.6145009673725407e-07, "loss": 0.1736, "step": 10595, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4939759036144578, "success_rate.epoch.env.logic": 0.5792276964047937, "success_rate.epoch.env.math": 0.9790575916230366, "success_rate.epoch.env.sat": 0.10238907849829351, "success_rate.epoch.env.science": 0.9601910828025477, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.598746891983417, "success_rate.epoch.global": 0.856302397411384, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9897388059701493, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9852941176470589, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014705882352941176 }, { "epoch": 1.761968085106383, "grad_norm": 315.4143076224078, "learning_rate": 1.6144015274151998e-07, "loss": 0.3128, "step": 10600, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4939759036144578, "success_rate.epoch.env.logic": 0.5797872340425532, "success_rate.epoch.env.math": 0.9790575916230366, "success_rate.epoch.env.sat": 0.10238907849829351, "success_rate.epoch.env.science": 0.9602363099295614, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5988018705983964, "success_rate.epoch.global": 0.8564290962527553, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9954954954954955, "tokens_p.mean_in_band": 0.6953125, "tokens_rate.above_band": 0.9955156950672646, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004484304932735426 }, { "epoch": 1.7627992021276597, "grad_norm": 73.75337475244935, "learning_rate": 1.614302416849678e-07, "loss": 0.2492, "step": 10605, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4939759036144578, "success_rate.epoch.env.logic": 0.5797872340425532, "success_rate.epoch.env.math": 0.9791231732776617, "success_rate.epoch.env.sat": 0.10238907849829351, "success_rate.epoch.env.science": 0.9602453430258974, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5988086537575747, "success_rate.epoch.global": 0.8565134380966368, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9910287081339713, "tokens_p.mean_in_band": 0.5538793103448276, "tokens_rate.above_band": 0.8781512605042017, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12184873949579832 }, { "epoch": 1.7636303191489362, "grad_norm": 0.7034346217662198, "learning_rate": 1.6142036358518724e-07, "loss": 0.1612, "step": 10610, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4939759036144578, "success_rate.epoch.env.logic": 0.5803452855245684, "success_rate.epoch.env.math": 0.9791666666666666, "success_rate.epoch.env.sat": 0.10238907849829351, "success_rate.epoch.env.science": 0.960272417707151, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5988658009895996, "success_rate.epoch.global": 0.856639765223771, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9883191747572816, "tokens_p.mean_in_band": 0.8802083333333334, "tokens_rate.above_band": 0.9856459330143541, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014354066985645933 }, { "epoch": 1.7644614361702127, "grad_norm": 37.57350612689455, "learning_rate": 1.6141051845970954e-07, "loss": 0.2963, "step": 10615, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49101796407185627, "success_rate.epoch.env.logic": 0.5809018567639257, "success_rate.epoch.env.math": 0.9791666666666666, "success_rate.epoch.env.sat": 0.10238907849829351, "success_rate.epoch.env.science": 0.9603084599682468, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5986507713494044, "success_rate.epoch.global": 0.8566192640375312, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9906858048349056, "tokens_p.mean_below_band": 4.991888999938965e-07, "tokens_p.mean_in_band": 0.5673249551166966, "tokens_rate.above_band": 0.8585168311819793, "tokens_rate.below_band": 0.0005062009617818274, "tokens_rate.in_band": 0.14097696785623892 }, { "epoch": 1.7652925531914894, "grad_norm": 12.558511457793411, "learning_rate": 1.6140070632600733e-07, "loss": 0.1631, "step": 10620, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49101796407185627, "success_rate.epoch.env.logic": 0.5809018567639257, "success_rate.epoch.env.math": 0.9791883454734651, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.9603264565858082, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5989319326036603, "success_rate.epoch.global": 0.8567032967032967, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9917561349693251, "tokens_p.mean_in_band": 0.7421875, "tokens_rate.above_band": 0.9760479041916168, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023952095808383235 }, { "epoch": 1.766123670212766, "grad_norm": 83.80576165897881, "learning_rate": 1.6139092720149478e-07, "loss": 0.1593, "step": 10625, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49101796407185627, "success_rate.epoch.env.logic": 0.5809018567639257, "success_rate.epoch.env.math": 0.9791883454734651, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.9603624009060022, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5989352002691324, "success_rate.epoch.global": 0.8567872309269293, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9938479965156795, "tokens_p.mean_in_band": 0.7503142959770115, "tokens_rate.above_band": 0.9295546558704454, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07044534412955465 }, { "epoch": 1.7669547872340425, "grad_norm": 228.61911920987004, "learning_rate": 1.6138118110352744e-07, "loss": 0.1194, "step": 10630, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5809018567639257, "success_rate.epoch.env.math": 0.9792099792099792, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.9604072398190046, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.599216666416885, "success_rate.epoch.global": 0.8569338794616735, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9908216783216783, "tokens_p.mean_in_band": 0.78515625, "tokens_rate.above_band": 0.9965156794425087, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003484320557491289 }, { "epoch": 1.767785904255319, "grad_norm": 32.3489141796363, "learning_rate": 1.6137146804940222e-07, "loss": 0.4175, "step": 10635, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5809018567639257, "success_rate.epoch.env.math": 0.9792099792099792, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.9602169981916817, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.599199371723492, "success_rate.epoch.global": 0.8568713450292398, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.985176282051282, "tokens_p.mean_below_band": 4.6798959374427795e-08, "tokens_rate.above_band": 0.9936305732484076, "tokens_rate.below_band": 0.006369426751592357, "tokens_rate.in_band": 0.0 }, { "epoch": 1.7686170212765957, "grad_norm": 142.57846639845263, "learning_rate": 1.6136178805635745e-07, "loss": 0.1124, "step": 10640, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5814569536423841, "success_rate.epoch.env.math": 0.9792746113989638, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.9602439575333183, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5992581615788629, "success_rate.epoch.global": 0.857017671973127, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9986275337837838, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9966329966329966, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003367003367003367 }, { "epoch": 1.7694481382978724, "grad_norm": 32.15966394505021, "learning_rate": 1.6135214114157273e-07, "loss": 0.1893, "step": 10645, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5814569536423841, "success_rate.epoch.env.math": 0.979296066252588, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.9603067207938656, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5992658177710604, "success_rate.epoch.global": 0.8571845368344274, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944073783359497, "tokens_p.mean_in_band": 0.6701388888888888, "tokens_rate.above_band": 0.9860681114551083, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01393188854489164 }, { "epoch": 1.7702792553191489, "grad_norm": 61.83727802351752, "learning_rate": 1.6134252732216892e-07, "loss": 0.2424, "step": 10650, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5806878306878307, "success_rate.epoch.env.math": 0.9793174767321613, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.960315670800451, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5991986575466609, "success_rate.epoch.global": 0.8571011956838729, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969468390804598, "tokens_p.mean_in_band": 0.6474609375, "tokens_rate.above_band": 0.9886363636363636, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011363636363636364 }, { "epoch": 1.7711103723404256, "grad_norm": 45.13221231646338, "learning_rate": 1.6133294661520826e-07, "loss": 0.286, "step": 10655, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5812417437252312, "success_rate.epoch.env.math": 0.9793601651186791, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.9603335587108407, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5992545202134161, "success_rate.epoch.global": 0.8572053038030016, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9957306338028169, "tokens_p.mean_in_band": 0.5216619318181818, "tokens_rate.above_band": 0.955585464333782, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04441453566621804 }, { "epoch": 1.7719414893617023, "grad_norm": 60.32894950868391, "learning_rate": 1.6132339903769417e-07, "loss": 0.2419, "step": 10660, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5812417437252312, "success_rate.epoch.env.math": 0.979381443298969, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.9603692861968025, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5992597025467117, "success_rate.epoch.global": 0.8573092603377985, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9947337042925278, "tokens_p.mean_in_band": 0.7075892857142857, "tokens_rate.above_band": 0.9889937106918238, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0110062893081761 }, { "epoch": 1.7727726063829787, "grad_norm": 148.7396810129139, "learning_rate": 1.6131388460657119e-07, "loss": 0.1919, "step": 10665, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5817941952506597, "success_rate.epoch.env.math": 0.9794026776519053, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.9603782080144079, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5993126668827089, "success_rate.epoch.global": 0.8573715616358608, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.7142857142857143, "success_rate.window.env_macro_mean": 0.9047619047619048, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9965377697841726, "tokens_p.mean_below_band": 1.126900315284729e-07, "tokens_p.mean_in_band": 0.6722530241935484, "tokens_rate.above_band": 0.9559834938101788, "tokens_rate.below_band": 0.001375515818431912, "tokens_rate.in_band": 0.04264099037138927 }, { "epoch": 1.7736037234042552, "grad_norm": 62.75924004314427, "learning_rate": 1.6130440333872517e-07, "loss": 0.2203, "step": 10670, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5823451910408433, "success_rate.epoch.env.math": 0.9794238683127572, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.9604138551506972, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5993679244815567, "success_rate.epoch.global": 0.8574960011632979, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996875, "tokens_p.mean_in_band": 0.8525390625, "tokens_rate.above_band": 0.9939759036144579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006024096385542169 }, { "epoch": 1.774434840425532, "grad_norm": 26.635178446257964, "learning_rate": 1.6129495525098297e-07, "loss": 0.2278, "step": 10675, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5823451910408433, "success_rate.epoch.env.math": 0.9794450154162384, "success_rate.epoch.env.sat": 0.1054421768707483, "success_rate.epoch.env.science": 0.9604494382022472, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5993730817683777, "success_rate.epoch.global": 0.8575995350188899, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9935774858663119, "tokens_p.mean_in_band": 0.7221513605442177, "tokens_rate.above_band": 0.9533925174381738, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04660748256182625 }, { "epoch": 1.7752659574468086, "grad_norm": 34.247749283077695, "learning_rate": 1.612855403601127e-07, "loss": 0.2061, "step": 10680, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5834428383705651, "success_rate.epoch.env.math": 0.9794661190965093, "success_rate.epoch.env.sat": 0.10508474576271186, "success_rate.epoch.env.science": 0.9604849573417154, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5994455216812345, "success_rate.epoch.global": 0.8576197387518142, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954318936877077, "tokens_p.mean_in_band": 0.6220128676470589, "tokens_rate.above_band": 0.9465408805031447, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05345911949685535 }, { "epoch": 1.776097074468085, "grad_norm": 44.89425586317634, "learning_rate": 1.6127615868282347e-07, "loss": 0.249, "step": 10685, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.583989501312336, "success_rate.epoch.env.math": 0.9794661190965093, "success_rate.epoch.env.sat": 0.10508474576271186, "success_rate.epoch.env.science": 0.9602871886919453, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5994772393441437, "success_rate.epoch.global": 0.8575572961995939, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9375, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9901463963963963, "tokens_p.mean_below_band": 3.1650415621697903e-10, "tokens_p.mean_in_band": 0.7125, "tokens_rate.above_band": 0.9736842105263158, "tokens_rate.below_band": 0.0043859649122807015, "tokens_rate.in_band": 0.021929824561403508 }, { "epoch": 1.7769281914893615, "grad_norm": 66.04998950180257, "learning_rate": 1.6126681023576544e-07, "loss": 0.2835, "step": 10690, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.583989501312336, "success_rate.epoch.env.math": 0.9795291709314228, "success_rate.epoch.env.sat": 0.10508474576271186, "success_rate.epoch.env.science": 0.9603139013452915, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5994853997521672, "success_rate.epoch.global": 0.8576811594202899, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9989577343300747, "tokens_p.mean_in_band": 0.5974657012195121, "tokens_rate.above_band": 0.9769662921348314, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.023033707865168538 }, { "epoch": 1.7777593085106385, "grad_norm": 116.14493202306781, "learning_rate": 1.612574950355298e-07, "loss": 0.3844, "step": 10695, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.583989501312336, "success_rate.epoch.env.math": 0.9795709908069459, "success_rate.epoch.env.sat": 0.10508474576271186, "success_rate.epoch.env.science": 0.9603494623655914, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5994924343790603, "success_rate.epoch.global": 0.857804807413843, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9983219844357977, "tokens_p.mean_in_band": 0.44703125, "tokens_rate.above_band": 0.9809160305343512, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019083969465648856 }, { "epoch": 1.778590425531915, "grad_norm": 70.88412104735612, "learning_rate": 1.6124821309864876e-07, "loss": 0.2229, "step": 10700, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5845347313237221, "success_rate.epoch.env.math": 0.9795918367346939, "success_rate.epoch.env.sat": 0.10472972972972973, "success_rate.epoch.env.science": 0.9603849597135183, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5995148486748856, "success_rate.epoch.global": 0.8578041371329379, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9929577464788732, "tokens_p.mean_in_band": 0.72109375, "tokens_rate.above_band": 0.9342105263157895, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06578947368421052 }, { "epoch": 1.7794215425531914, "grad_norm": 541.3729652006937, "learning_rate": 1.6123896444159547e-07, "loss": 0.2195, "step": 10705, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5856209150326798, "success_rate.epoch.env.math": 0.9796126401630989, "success_rate.epoch.env.sat": 0.10472972972972973, "success_rate.epoch.env.science": 0.9603938241217275, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5996162897244831, "success_rate.epoch.global": 0.8578863669220761, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env.webshop": 0.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9945956256358087, "tokens_p.mean_in_band": 0.48607016509433965, "tokens_rate.above_band": 0.8607705779334501, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.13922942206654992 }, { "epoch": 1.780252659574468, "grad_norm": 84.96117863549608, "learning_rate": 1.612297490807841e-07, "loss": 0.2867, "step": 10710, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5856209150326798, "success_rate.epoch.env.math": 0.9797365754812564, "success_rate.epoch.env.sat": 0.10472972972972973, "success_rate.epoch.env.science": 0.9604026845637584, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5996283620663184, "success_rate.epoch.global": 0.8580300404390526, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9889423076923077, "tokens_p.mean_below_band": 1.2014061212539673e-07, "tokens_rate.above_band": 0.9923664122137404, "tokens_rate.below_band": 0.007633587786259542, "tokens_rate.in_band": 0.0 }, { "epoch": 1.7810837765957448, "grad_norm": 36.91675718570145, "learning_rate": 1.6122056703256958e-07, "loss": 0.188, "step": 10715, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5856209150326798, "success_rate.epoch.env.math": 0.979757085020243, "success_rate.epoch.env.sat": 0.10472972972972973, "success_rate.epoch.env.science": 0.9604645968282332, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5996358549575421, "success_rate.epoch.global": 0.858193883439123, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9917410714285714, "tokens_p.mean_in_band": 0.78125, "tokens_rate.above_band": 0.9929078014184397, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0070921985815602835 }, { "epoch": 1.7819148936170213, "grad_norm": 26.620477620727552, "learning_rate": 1.6121141831324785e-07, "loss": 0.3729, "step": 10720, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.49404761904761907, "success_rate.epoch.env.logic": 0.5861618798955613, "success_rate.epoch.env.math": 0.9797979797979798, "success_rate.epoch.env.sat": 0.10472972972972973, "success_rate.epoch.env.science": 0.9604822505023443, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5996903561679721, "success_rate.epoch.global": 0.8582960934121379, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.55, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.990301724137931, "tokens_p.mean_below_band": 4.544854164123535e-07, "tokens_p.mean_in_band": 0.15381205673758866, "tokens_rate.above_band": 0.3232484076433121, "tokens_rate.below_band": 0.0031847133757961785, "tokens_rate.in_band": 0.6735668789808917 }, { "epoch": 1.7827460106382977, "grad_norm": 64.2413703790169, "learning_rate": 1.6120230293905559e-07, "loss": 0.245, "step": 10725, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4970414201183432, "success_rate.epoch.env.logic": 0.5861618798955613, "success_rate.epoch.env.math": 0.9797979797979798, "success_rate.epoch.env.sat": 0.10437710437710437, "success_rate.epoch.env.science": 0.9604998884177639, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5999320664982919, "success_rate.epoch.global": 0.858233683907218, "success_rate.window.env.ded": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9885493094518775, "tokens_p.mean_in_band": 0.5636362128450106, "tokens_rate.above_band": 0.8310616929698709, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.16893830703012913 }, { "epoch": 1.7835771276595744, "grad_norm": 77.78724534866353, "learning_rate": 1.611932209261704e-07, "loss": 0.1425, "step": 10730, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.4970414201183432, "success_rate.epoch.env.logic": 0.5861618798955613, "success_rate.epoch.env.math": 0.9797979797979798, "success_rate.epoch.env.sat": 0.10437710437710437, "success_rate.epoch.env.science": 0.9605439144003567, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5999360688603458, "success_rate.epoch.global": 0.8583357327958537, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9896634615384615, "tokens_p.mean_in_band": 0.85546875, "tokens_rate.above_band": 0.9923664122137404, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007633587786259542 }, { "epoch": 1.7844082446808511, "grad_norm": 32.303789831973695, "learning_rate": 1.611841722907106e-07, "loss": 0.1799, "step": 10735, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5861618798955613, "success_rate.epoch.env.math": 0.9798183652875883, "success_rate.epoch.env.sat": 0.10437710437710437, "success_rate.epoch.env.science": 0.9605790645879733, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6002100793656987, "success_rate.epoch.global": 0.8584579976985041, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967468454258676, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.7852393617021276, "grad_norm": 47.68586764099244, "learning_rate": 1.6117515704873534e-07, "loss": 0.1262, "step": 10740, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5861618798955613, "success_rate.epoch.env.math": 0.9798590130916415, "success_rate.epoch.env.sat": 0.10437710437710437, "success_rate.epoch.env.science": 0.9605966162065895, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.600215370222305, "success_rate.epoch.global": 0.8585393904542841, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.996484375, "tokens_p.mean_in_band": 0.853125, "tokens_rate.above_band": 0.9795918367346939, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02040816326530612 }, { "epoch": 1.7860704787234043, "grad_norm": 17.16289170253508, "learning_rate": 1.6116617521624444e-07, "loss": 0.2204, "step": 10745, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.4625, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5861618798955613, "success_rate.epoch.env.math": 0.9798792756539235, "success_rate.epoch.env.sat": 0.10437710437710437, "success_rate.epoch.env.science": 0.9606229143492769, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6002196030136658, "success_rate.epoch.global": 0.8586206896551725, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9870084269662921, "tokens_p.mean_in_band": 0.7760416666666666, "tokens_rate.above_band": 0.9834254143646409, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016574585635359115 }, { "epoch": 1.786901595744681, "grad_norm": 108.8161588564367, "learning_rate": 1.6115722680917845e-07, "loss": 0.2069, "step": 10750, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5, "success_rate.epoch.env.logic": 0.5861618798955613, "success_rate.epoch.env.math": 0.9798994974874372, "success_rate.epoch.env.sat": 0.10437710437710437, "success_rate.epoch.env.science": 0.9606579239831073, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.599963472164565, "success_rate.epoch.global": 0.8585989089865059, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949524940617577, "tokens_p.mean_in_band": 0.6748046875, "tokens_rate.above_band": 0.9952718676122931, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004728132387706856 }, { "epoch": 1.7877327127659575, "grad_norm": 155.21564865186264, "learning_rate": 1.611483118434186e-07, "loss": 0.1781, "step": 10755, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.5861618798955613, "success_rate.epoch.env.math": 0.9799398194583752, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9606841403820524, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6004662206299046, "success_rate.epoch.global": 0.8586177229710352, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9966322055137845, "tokens_p.mean_in_band": 0.6277721774193549, "tokens_rate.above_band": 0.962605548854041, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03739445114595899 }, { "epoch": 1.788563829787234, "grad_norm": 17.373103121455166, "learning_rate": 1.6113943033478675e-07, "loss": 0.1922, "step": 10760, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.5861618798955613, "success_rate.epoch.env.math": 0.9799599198396793, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9607190412782957, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6004712207460452, "success_rate.epoch.global": 0.8587190141854134, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.988, "tokens_p.mean_in_band": 0.8359375, "tokens_rate.above_band": 0.9920634920634921, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007936507936507936 }, { "epoch": 1.7893949468085106, "grad_norm": 59.74296882587284, "learning_rate": 1.6113058229904547e-07, "loss": 0.234, "step": 10765, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.5867014341590613, "success_rate.epoch.env.math": 0.9799599198396793, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9605409000221681, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6005040764739882, "success_rate.epoch.global": 0.8586972083035075, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9920948616600791, "tokens_p.mean_below_band": 1.4637180356658064e-12, "tokens_p.mean_in_band": 0.559814453125, "tokens_rate.above_band": 0.937037037037037, "tokens_rate.below_band": 0.003703703703703704, "tokens_rate.in_band": 0.05925925925925926 }, { "epoch": 1.7902260638297873, "grad_norm": 47.34291211186402, "learning_rate": 1.6112176775189776e-07, "loss": 0.2077, "step": 10770, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.5867014341590613, "success_rate.epoch.env.math": 0.97997997997998, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9605583868823399, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6005074898376676, "success_rate.epoch.global": 0.8587578706353749, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9858870967741935, "tokens_p.mean_in_band": 0.7919921875, "tokens_rate.above_band": 0.950920245398773, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049079754601226995 }, { "epoch": 1.7910571808510638, "grad_norm": 102.0355084399512, "learning_rate": 1.6111298670898735e-07, "loss": 0.186, "step": 10775, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.58777633289987, "success_rate.epoch.env.math": 0.97997997997998, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9605845881310895, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6006075898367182, "success_rate.epoch.global": 0.8588588588588588, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952651515151515, "tokens_p.mean_in_band": 0.6335720486111112, "tokens_rate.above_band": 0.9705882352941176, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029411764705882353 }, { "epoch": 1.7918882978723403, "grad_norm": 129.2380060196005, "learning_rate": 1.6110423918589843e-07, "loss": 0.2641, "step": 10780, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.58777633289987, "success_rate.epoch.env.math": 0.98, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9606368863334808, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6006141642205738, "success_rate.epoch.global": 0.859, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99, "tokens_p.mean_in_band": 0.8779296875, "tokens_rate.above_band": 0.974025974025974, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025974025974025976 }, { "epoch": 1.792719414893617, "grad_norm": 1.930725702269604, "learning_rate": 1.6109552519815573e-07, "loss": 0.2438, "step": 10785, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.58777633289987, "success_rate.epoch.env.math": 0.98, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9606716747680071, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6006173268055307, "success_rate.epoch.global": 0.8590805254140491, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9943548387096774, "tokens_p.mean_in_band": 0.677734375, "tokens_rate.above_band": 0.9914712153518124, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008528784648187633 }, { "epoch": 1.7935505319148937, "grad_norm": 54.248138108864985, "learning_rate": 1.610868447612244e-07, "loss": 0.2339, "step": 10790, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.58777633289987, "success_rate.epoch.env.math": 0.98001998001998, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9607150739351137, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6006230885498114, "success_rate.epoch.global": 0.8592011412268188, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9923780487804879, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.7943816489361701, "grad_norm": 37.33686049422359, "learning_rate": 1.6107819789051007e-07, "loss": 0.2256, "step": 10795, "success_rate.epoch.env.abd": 0.6136363636363636, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.5875486381322957, "success_rate.epoch.env.math": 0.9800598205383848, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9607324067946172, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6006075866052962, "success_rate.epoch.global": 0.8591790193842646, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949270767279645, "tokens_p.mean_in_band": 0.7235243055555556, "tokens_rate.above_band": 0.9563371740448757, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.043662825955124315 }, { "epoch": 1.7952127659574468, "grad_norm": 56.47097629263309, "learning_rate": 1.6106958460135887e-07, "loss": 0.3491, "step": 10800, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.5875486381322957, "success_rate.epoch.env.math": 0.9800995024875622, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9607670266696055, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5993746718950965, "success_rate.epoch.global": 0.8591769898903603, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.7083333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9892490671641792, "tokens_p.mean_below_band": 1.0477378964424133e-08, "tokens_p.mean_in_band": 0.17683390345876093, "tokens_rate.above_band": 0.337361530715005, "tokens_rate.below_band": 0.00025176233635448137, "tokens_rate.in_band": 0.6623867069486404 }, { "epoch": 1.7960438829787235, "grad_norm": 84.01805980590433, "learning_rate": 1.6106100490905728e-07, "loss": 0.2668, "step": 10805, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45962732919254656, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.5873221216041398, "success_rate.epoch.env.math": 0.9801192842942346, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9607929515418502, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5993582346360747, "success_rate.epoch.global": 0.8591549295774648, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964862440191388, "tokens_p.mean_in_band": 0.5183823529411765, "tokens_rate.above_band": 0.9800703399765534, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01992966002344666 }, { "epoch": 1.796875, "grad_norm": 165.19382631484845, "learning_rate": 1.6105245882883211e-07, "loss": 0.2009, "step": 10810, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4567901234567901, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.5873221216041398, "success_rate.epoch.env.math": 0.9801783944499505, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9608102157639806, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5991072499671738, "success_rate.epoch.global": 0.8591329068941009, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9942010309278351, "tokens_p.mean_in_band": 0.4375, "tokens_rate.above_band": 0.9965753424657534, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003424657534246575 }, { "epoch": 1.7977061170212765, "grad_norm": 33.51566170180076, "learning_rate": 1.610439463758506e-07, "loss": 0.1779, "step": 10815, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4567901234567901, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.5873221216041398, "success_rate.epoch.env.math": 0.9801980198019802, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9608446986361636, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.5991121688966476, "success_rate.epoch.global": 0.8592329545454546, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9919258373205742, "tokens_p.mean_below_band": 2.2065682614424986e-15, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.990521327014218, "tokens_rate.below_band": 0.004739336492890996, "tokens_rate.in_band": 0.004739336492890996 }, { "epoch": 1.7985372340425532, "grad_norm": 14.049088660533068, "learning_rate": 1.6103546756522025e-07, "loss": 0.187, "step": 10820, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4567901234567901, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.4666666666666667, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.5873221216041398, "success_rate.epoch.env.math": 0.9802176063303659, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9608619173262972, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.599115514825604, "success_rate.epoch.global": 0.8592929149510152, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9935830700746697, "tokens_p.mean_in_band": 0.5491980651731161, "tokens_rate.above_band": 0.8764158066951926, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12358419330480745 }, { "epoch": 1.7993683510638299, "grad_norm": 94.23614605392928, "learning_rate": 1.6102702241198886e-07, "loss": 0.2219, "step": 10825, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4567901234567901, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5058139534883721, "success_rate.epoch.env.logic": 0.5873221216041398, "success_rate.epoch.env.math": 0.9802371541501976, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.960896309314587, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6021507214748272, "success_rate.epoch.global": 0.8594126826500212, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9929481672394044, "tokens_p.mean_below_band": 1.9190338207408786e-10, "tokens_p.mean_in_band": 0.5601619525547445, "tokens_rate.above_band": 0.863928748144483, "tokens_rate.below_band": 0.0004948045522018803, "tokens_rate.in_band": 0.1355764473033152 }, { "epoch": 1.8001994680851063, "grad_norm": 21.725552557153158, "learning_rate": 1.6101861093114452e-07, "loss": 0.2472, "step": 10830, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4539877300613497, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5086705202312138, "success_rate.epoch.env.logic": 0.58656330749354, "success_rate.epoch.env.math": 0.9802955665024631, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9609220636663007, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6020943147421709, "success_rate.epoch.global": 0.8593085860017002, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.998381388121547, "tokens_p.mean_in_band": 0.6746323529411765, "tokens_rate.above_band": 0.9883959044368601, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011604095563139932 }, { "epoch": 1.801030585106383, "grad_norm": 15.513609986208968, "learning_rate": 1.6101023313761557e-07, "loss": 0.3737, "step": 10835, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4539877300613497, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5086705202312138, "success_rate.epoch.env.logic": 0.58656330749354, "success_rate.epoch.env.math": 0.9803343166175025, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9609649122807018, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6021017328084836, "success_rate.epoch.global": 0.859447983014862, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9903085443037974, "tokens_p.mean_in_band": 0.85888671875, "tokens_rate.above_band": 0.9518072289156626, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04819277108433735 }, { "epoch": 1.8018617021276597, "grad_norm": 50.888313834838925, "learning_rate": 1.610018890462705e-07, "loss": 0.1737, "step": 10840, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4539877300613497, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5086705202312138, "success_rate.epoch.env.logic": 0.5886889460154242, "success_rate.epoch.env.math": 0.9803536345776032, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9609734707301031, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6022975068931551, "success_rate.epoch.global": 0.8595672464997879, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9994687017001546, "tokens_p.mean_in_band": 0.669375, "tokens_rate.above_band": 0.9810462471569371, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018953752843062926 }, { "epoch": 1.8026928191489362, "grad_norm": 117.19268834545286, "learning_rate": 1.6099357867191804e-07, "loss": 0.2364, "step": 10845, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4539877300613497, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5086705202312138, "success_rate.epoch.env.logic": 0.5895140664961637, "success_rate.epoch.env.math": 0.9803536345776032, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9609905763751918, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.602374072904594, "success_rate.epoch.global": 0.8595450049455984, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9934219380888291, "tokens_p.mean_in_band": 0.5498870481927711, "tokens_rate.above_band": 0.8817944457631142, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11820555423688583 }, { "epoch": 1.8035239361702127, "grad_norm": 54.87329252872173, "learning_rate": 1.609853020293071e-07, "loss": 0.2712, "step": 10850, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5086705202312138, "success_rate.epoch.env.logic": 0.5895140664961637, "success_rate.epoch.env.math": 0.9803536345776032, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9610076670317634, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6021239704309889, "success_rate.epoch.global": 0.8594632768361582, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.995777027027027, "tokens_p.mean_in_band": 0.5938720703125, "tokens_rate.above_band": 0.9327731092436975, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06722689075630252 }, { "epoch": 1.8043550531914894, "grad_norm": 59.80113712265346, "learning_rate": 1.6097705913312663e-07, "loss": 0.1564, "step": 10855, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5086705202312138, "success_rate.epoch.env.logic": 0.5895140664961637, "success_rate.epoch.env.math": 0.9803921568627451, "success_rate.epoch.env.sat": 0.1040268456375839, "success_rate.epoch.env.science": 0.9610332749562172, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.602129800450043, "success_rate.epoch.global": 0.8595624558927312, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9893617021276596, "tokens_p.mean_in_band": 0.6083333333333333, "tokens_rate.above_band": 0.9261083743842364, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07389162561576355 }, { "epoch": 1.805186170212766, "grad_norm": 82.76704370089003, "learning_rate": 1.6096884999800582e-07, "loss": 0.2698, "step": 10860, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5086705202312138, "success_rate.epoch.env.logic": 0.5895140664961637, "success_rate.epoch.env.math": 0.980411361410382, "success_rate.epoch.env.sat": 0.10367892976588629, "success_rate.epoch.env.science": 0.9610843900306077, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6021045644273458, "success_rate.epoch.global": 0.8595798674749753, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9934971098265896, "tokens_p.mean_in_band": 0.6182291666666667, "tokens_rate.above_band": 0.9202127659574468, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0797872340425532 }, { "epoch": 1.8060172872340425, "grad_norm": 73.75541550187319, "learning_rate": 1.6096067463851383e-07, "loss": 0.3175, "step": 10865, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5086705202312138, "success_rate.epoch.env.logic": 0.5895140664961637, "success_rate.epoch.env.math": 0.9804305283757339, "success_rate.epoch.env.sat": 0.10367892976588629, "success_rate.epoch.env.science": 0.9611098973126502, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6021086257225635, "success_rate.epoch.global": 0.8596590108496548, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.4166666666666667, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9910987074030553, "tokens_p.mean_below_band": 1.8533319234848022e-07, "tokens_p.mean_in_band": 0.4982205802603037, "tokens_rate.above_band": 0.8217458478176902, "tokens_rate.below_band": 0.00019312475859405175, "tokens_rate.in_band": 0.1780610274237157 }, { "epoch": 1.806848404255319, "grad_norm": 32.79037574406223, "learning_rate": 1.6095253306915995e-07, "loss": 0.3057, "step": 10870, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5086705202312138, "success_rate.epoch.env.logic": 0.5895140664961637, "success_rate.epoch.env.math": 0.98046875, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.9611438550534818, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6023867999285524, "success_rate.epoch.global": 0.8597972972972973, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9920180722891566, "tokens_p.mean_in_band": 0.8184523809523809, "tokens_rate.above_band": 0.9518348623853211, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0481651376146789 }, { "epoch": 1.8076795212765957, "grad_norm": 37.94694572669038, "learning_rate": 1.6094442530439343e-07, "loss": 0.157, "step": 10875, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5057471264367817, "success_rate.epoch.env.logic": 0.5887611749680716, "success_rate.epoch.env.math": 0.9804878048780488, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.9611523352247927, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6020550953582646, "success_rate.epoch.global": 0.8595948227349466, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9908613445378152, "tokens_p.mean_in_band": 0.49795502440106476, "tokens_rate.above_band": 0.7252559726962458, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.27474402730375425 }, { "epoch": 1.8085106382978724, "grad_norm": 92.51440367889073, "learning_rate": 1.6093635135860358e-07, "loss": 0.3007, "step": 10880, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5057471264367817, "success_rate.epoch.env.logic": 0.5880102040816326, "success_rate.epoch.env.math": 0.9804878048780488, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.9611523352247927, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6019868252776793, "success_rate.epoch.global": 0.8594739063159376, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.science": 0.6, "success_rate.window.env_macro_mean": 0.45, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 1.0003950140449438, "tokens_p.mean_below_band": 5.029141902923584e-08, "tokens_p.mean_in_band": 0.5941840277777778, "tokens_rate.above_band": 0.9861495844875346, "tokens_rate.below_band": 0.0013850415512465374, "tokens_rate.in_band": 0.012465373961218837 }, { "epoch": 1.8093417553191489, "grad_norm": 138.38660870721642, "learning_rate": 1.6092831124611973e-07, "loss": 0.357, "step": 10885, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5294117647058824, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5057471264367817, "success_rate.epoch.env.logic": 0.5880102040816326, "success_rate.epoch.env.math": 0.9804878048780488, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.9611862189271696, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6046637024057029, "success_rate.epoch.global": 0.8595726736013495, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9945507117437722, "tokens_p.mean_in_band": 0.837890625, "tokens_rate.above_band": 0.9929328621908127, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007067137809187279 }, { "epoch": 1.8101728723404256, "grad_norm": 32.394591454361134, "learning_rate": 1.6092030498121102e-07, "loss": 0.1672, "step": 10890, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5294117647058824, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5057471264367817, "success_rate.epoch.env.logic": 0.5880102040816326, "success_rate.epoch.env.math": 0.9804878048780488, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.9612453733942957, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6046690800845326, "success_rate.epoch.global": 0.859710714787249, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9908376963350786, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.8110039893617023, "grad_norm": 1.1973372800391116, "learning_rate": 1.609123325780867e-07, "loss": 0.2145, "step": 10895, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5294117647058824, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5057471264367817, "success_rate.epoch.env.logic": 0.5880102040816326, "success_rate.epoch.env.math": 0.9804878048780488, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.9612875163114397, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6046729112588184, "success_rate.epoch.global": 0.8598091495930396, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9946853741496599, "tokens_p.mean_in_band": 0.498046875, "tokens_rate.above_band": 0.9966101694915255, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003389830508474576 }, { "epoch": 1.8118351063829787, "grad_norm": 54.81361441209326, "learning_rate": 1.6090439405089574e-07, "loss": 0.2421, "step": 10900, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5294117647058824, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5057471264367817, "success_rate.epoch.env.logic": 0.5880102040816326, "success_rate.epoch.env.math": 0.9804878048780488, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.9613127580960661, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6046752059665116, "success_rate.epoch.global": 0.8598681441997476, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9933456873315364, "tokens_p.mean_in_band": 0.7465277777777778, "tokens_rate.above_band": 0.9763157894736842, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02368421052631579 }, { "epoch": 1.8126662234042552, "grad_norm": 1.7934461595338502, "learning_rate": 1.6089648941372717e-07, "loss": 0.1687, "step": 10905, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5294117647058824, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5057471264367817, "success_rate.epoch.env.logic": 0.5885350318471337, "success_rate.epoch.env.math": 0.9804878048780488, "success_rate.epoch.env.sat": 0.10666666666666667, "success_rate.epoch.env.science": 0.9613631430431951, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6047274980312962, "success_rate.epoch.global": 0.8600056053811659, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9933501237186285, "tokens_p.mean_in_band": 0.723043893129771, "tokens_rate.above_band": 0.9557432432432432, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.044256756756756756 }, { "epoch": 1.813497340425532, "grad_norm": 35.773572796412076, "learning_rate": 1.608886186806097e-07, "loss": 0.3519, "step": 10910, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5294117647058824, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5885350318471337, "success_rate.epoch.env.math": 0.9805068226120858, "success_rate.epoch.env.sat": 0.10631229235880399, "success_rate.epoch.env.science": 0.9614050303555941, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6049575737470435, "success_rate.epoch.global": 0.8600223964165733, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9912819253438114, "tokens_p.mean_in_band": 0.6825284090909091, "tokens_rate.above_band": 0.9686013320647003, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03139866793529972 }, { "epoch": 1.8143284574468086, "grad_norm": 32.06052618910017, "learning_rate": 1.6088078186551203e-07, "loss": 0.1763, "step": 10915, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5294117647058824, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5885350318471337, "success_rate.epoch.env.math": 0.9805258033106135, "success_rate.epoch.env.sat": 0.10927152317880795, "success_rate.epoch.env.science": 0.9614384748700173, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6052313606591303, "success_rate.epoch.global": 0.8601398601398601, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944036532951289, "tokens_p.mean_in_band": 0.6878551136363636, "tokens_rate.above_band": 0.9844851904090268, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015514809590973202 }, { "epoch": 1.815159574468085, "grad_norm": 47.28900936919914, "learning_rate": 1.6087297898234256e-07, "loss": 0.2481, "step": 10920, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.45121951219512196, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5294117647058824, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.589058524173028, "success_rate.epoch.env.math": 0.9805258033106135, "success_rate.epoch.env.sat": 0.10927152317880795, "success_rate.epoch.env.science": 0.9614551754006063, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6052804691006288, "success_rate.epoch.global": 0.8601985181042919, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9941339590443686, "tokens_p.mean_in_band": 0.80078125, "tokens_rate.above_band": 0.9865319865319865, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013468013468013467 }, { "epoch": 1.8159906914893615, "grad_norm": 157.81453079796705, "learning_rate": 1.6086521004494947e-07, "loss": 0.1933, "step": 10925, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4457831325301205, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5294117647058824, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5895806861499364, "success_rate.epoch.env.math": 0.980544747081712, "success_rate.epoch.env.sat": 0.10927152317880795, "success_rate.epoch.env.science": 0.9614801990911058, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6048377190800384, "success_rate.epoch.global": 0.8600558659217877, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9961163294797688, "tokens_p.mean_below_band": 2.455635694786906e-10, "tokens_p.mean_in_band": 0.703515625, "tokens_rate.above_band": 0.9914040114613181, "tokens_rate.below_band": 0.0014326647564469914, "tokens_rate.in_band": 0.0071633237822349575 }, { "epoch": 1.8168218085106385, "grad_norm": 0.0, "learning_rate": 1.6085747506712074e-07, "loss": 0.1365, "step": 10930, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4457831325301205, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5294117647058824, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5895806861499364, "success_rate.epoch.env.math": 0.9805825242718447, "success_rate.epoch.env.sat": 0.10927152317880795, "success_rate.epoch.env.science": 0.9612805537529743, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6048230037938566, "success_rate.epoch.global": 0.8599944165270799, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9955143540669856, "tokens_p.mean_below_band": 5.617039278149605e-09, "tokens_p.mean_in_band": 0.8411458333333334, "tokens_rate.above_band": 0.9812206572769953, "tokens_rate.below_band": 0.004694835680751174, "tokens_rate.in_band": 0.014084507042253521 }, { "epoch": 1.817652925531915, "grad_norm": 60.493922895028234, "learning_rate": 1.6084977406258405e-07, "loss": 0.4037, "step": 10935, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4491017964071856, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5294117647058824, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5893536121673004, "success_rate.epoch.env.math": 0.9806013579049466, "success_rate.epoch.env.sat": 0.10927152317880795, "success_rate.epoch.env.science": 0.9613140263669765, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6051088125339961, "success_rate.epoch.global": 0.8600111544896821, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9978181306306306, "tokens_p.mean_in_band": 0.4605263157894737, "tokens_rate.above_band": 0.9790518191841234, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.020948180815876516 }, { "epoch": 1.8184840425531914, "grad_norm": 33.669398942371046, "learning_rate": 1.6084210704500683e-07, "loss": 0.3771, "step": 10940, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4491017964071856, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5294117647058824, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5886075949367089, "success_rate.epoch.env.math": 0.9806013579049466, "success_rate.epoch.env.sat": 0.10927152317880795, "success_rate.epoch.env.science": 0.9613390928725702, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6050432715589963, "success_rate.epoch.global": 0.8599498327759197, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9945158850226928, "tokens_p.mean_in_band": 0.60703125, "tokens_rate.above_band": 0.9850968703427719, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014903129657228018 }, { "epoch": 1.819315159574468, "grad_norm": 58.64294875807672, "learning_rate": 1.608344740279961e-07, "loss": 0.1743, "step": 10945, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.4491017964071856, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5294117647058824, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5886075949367089, "success_rate.epoch.env.math": 0.9806201550387597, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.9613807982740021, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6050159870540459, "success_rate.epoch.global": 0.8599470973131004, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99140625, "tokens_p.mean_in_band": 0.7513020833333334, "tokens_rate.above_band": 0.9142857142857143, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08571428571428572 }, { "epoch": 1.8201462765957448, "grad_norm": 54.43821157244045, "learning_rate": 1.6082687502509866e-07, "loss": 0.1773, "step": 10950, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5896464646464646, "success_rate.epoch.env.math": 0.9806389157792836, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.9613891285591026, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.602196075331399, "success_rate.epoch.global": 0.8597857838364168, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.55, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9972001445086706, "tokens_p.mean_in_band": 0.7367838541666667, "tokens_rate.above_band": 0.9787835926449788, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021216407355021217 }, { "epoch": 1.8209773936170213, "grad_norm": 45.255446428147536, "learning_rate": 1.6081931004980082e-07, "loss": 0.2126, "step": 10955, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5901639344262295, "success_rate.epoch.env.math": 0.9806576402321083, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.9614224137931034, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.602247846191998, "success_rate.epoch.global": 0.8599027102154274, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979258849557522, "tokens_p.mean_in_band": 0.7408854166666666, "tokens_rate.above_band": 0.9912280701754386, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008771929824561403 }, { "epoch": 1.8218085106382977, "grad_norm": 44.74315336913736, "learning_rate": 1.6081177911552861e-07, "loss": 0.1868, "step": 10960, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5901639344262295, "success_rate.epoch.env.math": 0.9806763285024155, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.9614473400818436, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6022518111519115, "success_rate.epoch.global": 0.8599805528545631, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9886275272161742, "tokens_p.mean_in_band": 0.8136488970588235, "tokens_rate.above_band": 0.9497784342688331, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.050221565731166914 }, { "epoch": 1.8226396276595744, "grad_norm": 72.03023618372818, "learning_rate": 1.6080428223564754e-07, "loss": 0.2472, "step": 10965, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5901639344262295, "success_rate.epoch.env.math": 0.9806763285024155, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.9614722341799398, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6022540742517384, "success_rate.epoch.global": 0.8600388780894196, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9968147382920111, "tokens_p.mean_below_band": 1.318767317570746e-10, "tokens_p.mean_in_band": 0.732421875, "tokens_rate.above_band": 0.966711051930759, "tokens_rate.below_band": 0.0013315579227696406, "tokens_rate.in_band": 0.03195739014647137 }, { "epoch": 1.8234707446808511, "grad_norm": 39.520413934515865, "learning_rate": 1.607968194234628e-07, "loss": 0.3274, "step": 10970, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5894206549118388, "success_rate.epoch.env.math": 0.9806763285024155, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.9615053763440861, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6021895163108071, "success_rate.epoch.global": 0.8599972249202165, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9951603982300885, "tokens_p.mean_in_band": 0.6002604166666666, "tokens_rate.above_band": 0.9658119658119658, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03418803418803419 }, { "epoch": 1.8243018617021276, "grad_norm": 42.713377861446965, "learning_rate": 1.60789390692219e-07, "loss": 0.2894, "step": 10975, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.589937106918239, "success_rate.epoch.env.math": 0.9806763285024155, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.9615219260533104, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6022379710122275, "success_rate.epoch.global": 0.8600554785020804, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9916193181818181, "tokens_p.mean_in_band": 0.6215533088235294, "tokens_rate.above_band": 0.9282700421940928, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07172995780590717 }, { "epoch": 1.8251329787234043, "grad_norm": 35.4243584009813, "learning_rate": 1.6078199605510034e-07, "loss": 0.2489, "step": 10980, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5904522613065326, "success_rate.epoch.env.math": 0.9806763285024155, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.9615384615384616, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6022863064552678, "success_rate.epoch.global": 0.860113683626785, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9936960542540074, "tokens_p.mean_in_band": 0.541351073434992, "tokens_rate.above_band": 0.8668234288157333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1331765711842668 }, { "epoch": 1.825964095744681, "grad_norm": 37.26417575214121, "learning_rate": 1.6077463552523047e-07, "loss": 0.2052, "step": 10985, "success_rate.epoch.env.abd": 0.6, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5904522613065326, "success_rate.epoch.env.math": 0.9806763285024155, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.9615632381361392, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6022885588732386, "success_rate.epoch.global": 0.8601718403547672, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9945488721804512, "tokens_p.mean_in_band": 0.6325120192307693, "tokens_rate.above_band": 0.9808259587020649, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019174041297935103 }, { "epoch": 1.8267952127659575, "grad_norm": 79.18362012473959, "learning_rate": 1.6076730911567255e-07, "loss": 0.2048, "step": 10990, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5909661229611042, "success_rate.epoch.env.math": 0.9806763285024155, "success_rate.epoch.env.sat": 0.10891089108910891, "success_rate.epoch.env.science": 0.961579738141232, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6031272874035639, "success_rate.epoch.global": 0.8602493074792243, "success_rate.window.env.abd": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.000552398989899, "tokens_p.mean_in_band": 0.5372596153846154, "tokens_rate.above_band": 0.9682151589242054, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03178484107579462 }, { "epoch": 1.827626329787234, "grad_norm": 26.11528868489864, "learning_rate": 1.6076001683942918e-07, "loss": 0.2585, "step": 10995, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5914786967418546, "success_rate.epoch.env.math": 0.9806949806949807, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.9616044616044616, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6031129037361921, "success_rate.epoch.global": 0.8601079286010793, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952651515151515, "tokens_p.mean_in_band": 0.5737847222222222, "tokens_rate.above_band": 0.927710843373494, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07228915662650602 }, { "epoch": 1.8284574468085106, "grad_norm": 109.59843766073364, "learning_rate": 1.607527587094423e-07, "loss": 0.2208, "step": 11000, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5085714285714286, "success_rate.epoch.env.logic": 0.5914786967418546, "success_rate.epoch.env.math": 0.9807321772639692, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.9616126956894703, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6031170337956465, "success_rate.epoch.global": 0.8601659751037345, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971223747980614, "tokens_p.mean_in_band": 0.7125, "tokens_rate.above_band": 0.9841017488076311, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01589825119236884 }, { "epoch": 1.8292885638297873, "grad_norm": 18.241799164994678, "learning_rate": 1.6074553473859336e-07, "loss": 0.158, "step": 11005, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5113636363636364, "success_rate.epoch.env.logic": 0.5914786967418546, "success_rate.epoch.env.math": 0.9807507218479307, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.9614230604372053, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6033553171705469, "success_rate.epoch.global": 0.860124395300622, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9912883631713555, "tokens_p.mean_in_band": 0.69140625, "tokens_rate.above_band": 0.9898734177215189, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010126582278481013 }, { "epoch": 1.8301196808510638, "grad_norm": 33.298036106035156, "learning_rate": 1.6073834493970314e-07, "loss": 0.1694, "step": 11010, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5113636363636364, "success_rate.epoch.env.logic": 0.5914786967418546, "success_rate.epoch.env.math": 0.9807507218479307, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.961456102783726, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6033583210202306, "success_rate.epoch.global": 0.8602016853156513, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9970667870036101, "tokens_p.mean_in_band": 0.6428571428571429, "tokens_rate.above_band": 0.9518900343642611, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.048109965635738834 }, { "epoch": 1.8309507978723403, "grad_norm": 34.81480994694523, "learning_rate": 1.6073118932553174e-07, "loss": 0.3051, "step": 11015, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5113636363636364, "success_rate.epoch.env.logic": 0.5919899874843555, "success_rate.epoch.env.math": 0.9807877041306436, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.961472602739726, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6034096640185228, "success_rate.epoch.global": 0.8602981778023192, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9934039792387543, "tokens_p.mean_in_band": 0.6845128676470589, "tokens_rate.above_band": 0.9444444444444444, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05555555555555555 }, { "epoch": 1.831781914893617, "grad_norm": 612.05505873045, "learning_rate": 1.6072406790877867e-07, "loss": 0.239, "step": 11020, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5113636363636364, "success_rate.epoch.env.logic": 0.5925, "success_rate.epoch.env.math": 0.980806142034549, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.9615055603079555, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6034607011083208, "success_rate.epoch.global": 0.8604137931034482, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9962251529051988, "tokens_p.mean_in_band": 0.752734375, "tokens_rate.above_band": 0.9849397590361446, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015060240963855422 }, { "epoch": 1.8326130319148937, "grad_norm": 51.62770093542465, "learning_rate": 1.6071698070208264e-07, "loss": 0.27, "step": 11025, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5113636363636364, "success_rate.epoch.env.logic": 0.5925, "success_rate.epoch.env.math": 0.980806142034549, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.9615466780602435, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6034644390858016, "success_rate.epoch.global": 0.8605099931082012, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9900990099009901, "tokens_p.mean_in_band": 0.8984375, "tokens_rate.above_band": 0.9950738916256158, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0049261083743842365 }, { "epoch": 1.8334441489361701, "grad_norm": 122.07471596513099, "learning_rate": 1.6070992771802177e-07, "loss": 0.2251, "step": 11030, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5113636363636364, "success_rate.epoch.env.logic": 0.5925, "success_rate.epoch.env.math": 0.980806142034549, "success_rate.epoch.env.sat": 0.10819672131147541, "success_rate.epoch.env.science": 0.9615959035630467, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.603468914131511, "success_rate.epoch.global": 0.8606252582288941, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.4444444444444444, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9960056390977443, "tokens_p.mean_in_band": 0.7534877232142857, "tokens_rate.above_band": 0.95, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05 }, { "epoch": 1.8342752659574468, "grad_norm": 22.147860824478556, "learning_rate": 1.607029089691133e-07, "loss": 0.1586, "step": 11035, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5113636363636364, "success_rate.epoch.env.logic": 0.5925, "success_rate.epoch.env.math": 0.9808245445829339, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.9616286506075463, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6034414200893574, "success_rate.epoch.global": 0.8606027246456585, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9950520833333333, "tokens_p.mean_in_band": 0.7241683467741935, "tokens_rate.above_band": 0.8856088560885609, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.11439114391143912 }, { "epoch": 1.8351063829787235, "grad_norm": 82.00360133905485, "learning_rate": 1.6069592446781388e-07, "loss": 0.247, "step": 11040, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5113636363636364, "success_rate.epoch.env.logic": 0.5930087390761548, "success_rate.epoch.env.math": 0.9808429118773946, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.9616776665956994, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6034937948492455, "success_rate.epoch.global": 0.8607560137457044, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965572033898306, "tokens_p.mean_in_band": 0.58935546875, "tokens_rate.above_band": 0.9365079365079365, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06349206349206349 }, { "epoch": 1.8359375, "grad_norm": 37.888279525927594, "learning_rate": 1.6068897422651924e-07, "loss": 0.2422, "step": 11045, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5113636363636364, "success_rate.epoch.env.logic": 0.5935162094763092, "success_rate.epoch.env.math": 0.9808429118773946, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.9617102744097, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6035428928687141, "success_rate.epoch.global": 0.8608516483516484, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9952928870292888, "tokens_p.mean_in_band": 0.6259765625, "tokens_rate.above_band": 0.9227799227799228, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07722007722007722 }, { "epoch": 1.8367686170212765, "grad_norm": 333.8432768339014, "learning_rate": 1.6068205825756436e-07, "loss": 0.2513, "step": 11050, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5113636363636364, "success_rate.epoch.env.logic": 0.5940224159402242, "success_rate.epoch.env.math": 0.9808612440191388, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.9617184176945981, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6035913184951285, "success_rate.epoch.global": 0.8609089660854043, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.agentgym:textcraft": 0.5, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9965490797546013, "tokens_p.mean_in_band": 0.7579308712121212, "tokens_rate.above_band": 0.9718909710391823, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028109028960817718 }, { "epoch": 1.8375997340425532, "grad_norm": 76.47095503580229, "learning_rate": 1.6067517657322338e-07, "loss": 0.2439, "step": 11055, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5113636363636364, "success_rate.epoch.env.logic": 0.5940224159402242, "success_rate.epoch.env.math": 0.9808795411089866, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.9617509562260943, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6035959399152507, "success_rate.epoch.global": 0.8610043907793633, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9956373292867982, "tokens_p.mean_in_band": 0.67333984375, "tokens_rate.above_band": 0.9880059970014993, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01199400299850075 }, { "epoch": 1.8384308510638299, "grad_norm": 192.78289713320643, "learning_rate": 1.6066832918570961e-07, "loss": 0.2801, "step": 11060, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5141242937853108, "success_rate.epoch.env.logic": 0.5940224159402242, "success_rate.epoch.env.math": 0.9808795411089866, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.9617915516875398, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6038505992682615, "success_rate.epoch.global": 0.8611187277214148, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.9583333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9960801393728222, "tokens_p.mean_below_band": 1.7497114868092467e-13, "tokens_p.mean_in_band": 0.8275669642857143, "tokens_rate.above_band": 0.9728813559322034, "tokens_rate.below_band": 0.003389830508474576, "tokens_rate.in_band": 0.023728813559322035 }, { "epoch": 1.8392619680851063, "grad_norm": 1.2774234209587938, "learning_rate": 1.606615161071755e-07, "loss": 0.2572, "step": 11065, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5141242937853108, "success_rate.epoch.env.logic": 0.5945273631840796, "success_rate.epoch.env.math": 0.9808795411089866, "success_rate.epoch.env.sat": 0.10784313725490197, "success_rate.epoch.env.science": 0.9618401526393895, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6039009218315075, "success_rate.epoch.global": 0.861251883303657, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99375, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.840093085106383, "grad_norm": 89.0119652729704, "learning_rate": 1.606547373497126e-07, "loss": 0.3023, "step": 11070, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44642857142857145, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5141242937853108, "success_rate.epoch.env.logic": 0.5945273631840796, "success_rate.epoch.env.math": 0.9808795411089866, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.9618886301079822, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6038733942761634, "success_rate.epoch.global": 0.861247947454844, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9915149006622517, "tokens_p.mean_in_band": 0.725, "tokens_rate.above_band": 0.9096385542168675, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09036144578313253 }, { "epoch": 1.8409242021276597, "grad_norm": 72.14111264105186, "learning_rate": 1.6064799292535151e-07, "loss": 0.142, "step": 11075, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4437869822485207, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5141242937853108, "success_rate.epoch.env.logic": 0.5945273631840796, "success_rate.epoch.env.math": 0.9808795411089866, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.9617012272534913, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6036162131821141, "success_rate.epoch.global": 0.8610503282275711, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.2916666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9974708504801097, "tokens_p.mean_below_band": 2.7830537874251604e-10, "tokens_p.mean_in_band": 0.74375, "tokens_rate.above_band": 0.9851351351351352, "tokens_rate.below_band": 0.0013513513513513514, "tokens_rate.in_band": 0.013513513513513514 }, { "epoch": 1.8417553191489362, "grad_norm": 446.3812188637191, "learning_rate": 1.6064128284606196e-07, "loss": 0.3202, "step": 11080, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4437869822485207, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5141242937853108, "success_rate.epoch.env.logic": 0.5937888198757764, "success_rate.epoch.env.math": 0.9808978032473734, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.9617174280879864, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.603552205878894, "success_rate.epoch.global": 0.8609896118097321, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9995199692780338, "tokens_p.mean_in_band": 0.6077008928571429, "tokens_rate.above_band": 0.9789473684210527, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021052631578947368 }, { "epoch": 1.8425864361702127, "grad_norm": 60.80709227869134, "learning_rate": 1.6063460712375266e-07, "loss": 0.249, "step": 11085, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4437869822485207, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5141242937853108, "success_rate.epoch.env.logic": 0.5942928039702233, "success_rate.epoch.env.math": 0.9809160305343512, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.9617417036567322, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6036018865107278, "success_rate.epoch.global": 0.8610845512908073, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9937776548672567, "tokens_p.mean_in_band": 0.7161458333333334, "tokens_rate.above_band": 0.9495798319327731, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05042016806722689 }, { "epoch": 1.8434175531914894, "grad_norm": 78.75678670068612, "learning_rate": 1.6062796577027144e-07, "loss": 0.2917, "step": 11090, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4470588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5141242937853108, "success_rate.epoch.env.logic": 0.5935563816604709, "success_rate.epoch.env.math": 0.9809342230695901, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.9617578702725544, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6038355027036549, "success_rate.epoch.global": 0.8610428610428611, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9982323232323232, "tokens_p.mean_in_band": 0.5229867788461539, "tokens_rate.above_band": 0.9870388833499502, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01296111665004985 }, { "epoch": 1.844248670212766, "grad_norm": 30.982905189855895, "learning_rate": 1.6062135879740507e-07, "loss": 0.1606, "step": 11095, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4470588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5141242937853108, "success_rate.epoch.env.logic": 0.5935563816604709, "success_rate.epoch.env.math": 0.9809342230695901, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.9617740232312566, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6038369711544461, "success_rate.epoch.global": 0.8610807860262009, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9952664796633941, "tokens_p.mean_below_band": 6.344635039567947e-09, "tokens_p.mean_in_band": 0.6653645833333334, "tokens_rate.above_band": 0.9820936639118457, "tokens_rate.below_band": 0.0013774104683195593, "tokens_rate.in_band": 0.01652892561983471 }, { "epoch": 1.8450797872340425, "grad_norm": 58.56957262009416, "learning_rate": 1.6061478621687922e-07, "loss": 0.3042, "step": 11100, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4470588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.594059405940594, "success_rate.epoch.env.math": 0.9809342230695901, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.9617901625501373, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6041323168269881, "success_rate.epoch.global": 0.8611565739225314, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9936577490774908, "tokens_p.mean_in_band": 0.888671875, "tokens_rate.above_band": 0.9963235294117647, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003676470588235294 }, { "epoch": 1.845910904255319, "grad_norm": 32.52784351638375, "learning_rate": 1.6060824804035872e-07, "loss": 0.16, "step": 11105, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4470588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5945611866501854, "success_rate.epoch.env.math": 0.9809342230695901, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.9616114743724953, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6041616888753472, "success_rate.epoch.global": 0.8611148970969061, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4523809523809524, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9954651680391721, "tokens_p.mean_below_band": 2.9976945370435715e-09, "tokens_p.mean_in_band": 0.5656887755102041, "tokens_rate.above_band": 0.9014847512038523, "tokens_rate.below_band": 0.00020064205457463884, "tokens_rate.in_band": 0.09831460674157304 }, { "epoch": 1.8467420212765957, "grad_norm": 16.01149184544124, "learning_rate": 1.6060174427944715e-07, "loss": 0.2007, "step": 11110, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4470588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5955610357583231, "success_rate.epoch.env.math": 0.9809523809523809, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.9616357504215851, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6042564418789852, "success_rate.epoch.global": 0.8612283807708021, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9923155737704918, "tokens_p.mean_in_band": 0.75, "tokens_rate.above_band": 0.9807073954983923, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01929260450160772 }, { "epoch": 1.8475731382978724, "grad_norm": 257.7137592163602, "learning_rate": 1.6059527494568714e-07, "loss": 0.3892, "step": 11115, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4470588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5960591133004927, "success_rate.epoch.env.math": 0.9809523809523809, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.9616438356164384, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.60430245667326, "success_rate.epoch.global": 0.8612661674608577, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9977302922940655, "tokens_p.mean_in_band": 0.4939152644230769, "tokens_rate.above_band": 0.9774891774891775, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.022510822510822513 }, { "epoch": 1.8484042553191489, "grad_norm": 71.67974653821472, "learning_rate": 1.6058884005056018e-07, "loss": 0.3051, "step": 11120, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4470588235294118, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5960591133004927, "success_rate.epoch.env.math": 0.9809523809523809, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.961676142345757, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6043053936486525, "success_rate.epoch.global": 0.861341679140019, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988924050632911, "tokens_p.mean_in_band": 0.6510416666666666, "tokens_rate.above_band": 0.9850374064837906, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014962593516209476 }, { "epoch": 1.8492353723404256, "grad_norm": 63.31737710973163, "learning_rate": 1.6058243960548658e-07, "loss": 0.3134, "step": 11125, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5953259532595326, "success_rate.epoch.env.math": 0.9809885931558935, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.961692275310461, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6037709233499365, "success_rate.epoch.global": 0.8610657966286025, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9968039772727273, "tokens_p.mean_in_band": 0.66796875, "tokens_rate.above_band": 0.9832402234636871, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01675977653631285 }, { "epoch": 1.8500664893617023, "grad_norm": 94.36367572639328, "learning_rate": 1.6057607362182562e-07, "loss": 0.2235, "step": 11130, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5953259532595326, "success_rate.epoch.env.math": 0.9809885931558935, "success_rate.epoch.env.sat": 0.10749185667752444, "success_rate.epoch.env.science": 0.9617245005257623, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.603773852914964, "success_rate.epoch.global": 0.8611413043478261, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9966738861386139, "tokens_p.mean_in_band": 0.6981026785714286, "tokens_rate.above_band": 0.9914110429447853, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008588957055214725 }, { "epoch": 1.8508976063829787, "grad_norm": 21.907886986195273, "learning_rate": 1.605697421108753e-07, "loss": 0.2361, "step": 11135, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5958230958230958, "success_rate.epoch.env.math": 0.9810066476733144, "success_rate.epoch.env.sat": 0.10714285714285714, "success_rate.epoch.env.science": 0.9617486338797814, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6037911557240854, "success_rate.epoch.global": 0.8611186532717893, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9955645161290323, "tokens_p.mean_in_band": 0.635546875, "tokens_rate.above_band": 0.950920245398773, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.049079754601226995 }, { "epoch": 1.8517287234042552, "grad_norm": 46.32505353708447, "learning_rate": 1.6056344508387257e-07, "loss": 0.3269, "step": 11140, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5958230958230958, "success_rate.epoch.env.math": 0.9810066476733144, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9615788368675204, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6037441977558795, "success_rate.epoch.global": 0.8609603906673902, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.5833333333333334, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9955673758865248, "tokens_p.mean_below_band": 3.5943231776330853e-09, "tokens_p.mean_in_band": 0.5859375, "tokens_rate.above_band": 0.9644322845417237, "tokens_rate.below_band": 0.0027359781121751026, "tokens_rate.in_band": 0.03283173734610123 }, { "epoch": 1.852559840425532, "grad_norm": 122.98574198193228, "learning_rate": 1.6055718255199305e-07, "loss": 0.33, "step": 11145, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.596319018404908, "success_rate.epoch.env.math": 0.9810246679316889, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9613932018464121, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6037740439212503, "success_rate.epoch.global": 0.8609190727938186, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8, "success_rate.window.env_macro_mean": 0.76, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9975453172205438, "tokens_p.mean_below_band": 3.4051481634378433e-09, "tokens_p.mean_in_band": 0.6827651515151515, "tokens_rate.above_band": 0.9068493150684932, "tokens_rate.below_band": 0.0027397260273972603, "tokens_rate.in_band": 0.09041095890410959 }, { "epoch": 1.8533909574468086, "grad_norm": 44.922002995643176, "learning_rate": 1.6055095452635126e-07, "loss": 0.207, "step": 11150, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5955882352941176, "success_rate.epoch.env.math": 0.9810785241248817, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9614093959731543, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6037139773038999, "success_rate.epoch.global": 0.8609154929577465, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9989804964539007, "tokens_p.mean_in_band": 0.5790178571428571, "tokens_rate.above_band": 0.9757785467128027, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02422145328719723 }, { "epoch": 1.854222074468085, "grad_norm": 24.858581889020314, "learning_rate": 1.6054476101800034e-07, "loss": 0.2904, "step": 11155, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5955882352941176, "success_rate.epoch.env.math": 0.9810964083175804, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9614255765199161, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6037170740983963, "success_rate.epoch.global": 0.8609719777988358, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9925223214285714, "tokens_p.mean_in_band": 0.71484375, "tokens_rate.above_band": 0.9893992932862191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01060070671378092 }, { "epoch": 1.8550531914893615, "grad_norm": 111.33303799431422, "learning_rate": 1.6053860203793232e-07, "loss": 0.2037, "step": 11160, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5960832313341493, "success_rate.epoch.env.math": 0.9810964083175804, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9614659685863874, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.603765745744442, "success_rate.epoch.global": 0.8610848099553632, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969967532467533, "tokens_p.mean_in_band": 0.5687040441176471, "tokens_rate.above_band": 0.9783989834815756, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021601016518424398 }, { "epoch": 1.8558843085106385, "grad_norm": 77.84022231131404, "learning_rate": 1.6053247759707787e-07, "loss": 0.225, "step": 11165, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5965770171149144, "success_rate.epoch.env.math": 0.9811142587346553, "success_rate.epoch.env.sat": 0.10679611650485436, "success_rate.epoch.env.science": 0.9614740368509213, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6038129916046577, "success_rate.epoch.global": 0.8611411573823688, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9903748647917794, "tokens_p.mean_below_band": 6.258487701416016e-07, "tokens_p.mean_in_band": 0.5283040364583333, "tokens_rate.above_band": 0.8366515837104073, "tokens_rate.below_band": 0.00045248868778280545, "tokens_rate.in_band": 0.16289592760180996 }, { "epoch": 1.856715425531915, "grad_norm": 28.457786221125268, "learning_rate": 1.6052638770630633e-07, "loss": 0.2012, "step": 11170, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5970695970695971, "success_rate.epoch.env.math": 0.9811320754716981, "success_rate.epoch.env.sat": 0.1064516129032258, "success_rate.epoch.env.science": 0.9615143275465384, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6038317355851772, "success_rate.epoch.global": 0.8611561318206374, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9942660550458715, "tokens_p.mean_in_band": 0.620361328125, "tokens_rate.above_band": 0.9316239316239316, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06837606837606838 }, { "epoch": 1.8575465425531914, "grad_norm": 202.00326093856438, "learning_rate": 1.605203323764258e-07, "loss": 0.2977, "step": 11175, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5970695970695971, "success_rate.epoch.env.math": 0.9811320754716981, "success_rate.epoch.env.sat": 0.1064516129032258, "success_rate.epoch.env.science": 0.9615223755750731, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.603832467224135, "success_rate.epoch.global": 0.8611748818365969, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.2916666666666667, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9949583333333333, "tokens_p.mean_below_band": 9.549694368615746e-12, "tokens_p.mean_in_band": 0.4678308823529412, "tokens_rate.above_band": 0.9765625, "tokens_rate.below_band": 0.0013020833333333333, "tokens_rate.in_band": 0.022135416666666668 }, { "epoch": 1.858377659574468, "grad_norm": 26.559728022777445, "learning_rate": 1.60514311618183e-07, "loss": 0.2024, "step": 11180, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5168539325842697, "success_rate.epoch.env.logic": 0.5975609756097561, "success_rate.epoch.env.math": 0.981203007518797, "success_rate.epoch.env.sat": 0.1064516129032258, "success_rate.epoch.env.science": 0.9615384615384616, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6038850487287392, "success_rate.epoch.global": 0.8613059902860226, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9947465195692146, "tokens_p.mean_in_band": 0.5190096263277694, "tokens_rate.above_band": 0.8524406627854904, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14755933721450962 }, { "epoch": 1.8592087765957448, "grad_norm": 71.0598117766657, "learning_rate": 1.605083254422633e-07, "loss": 0.2666, "step": 11185, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5139664804469274, "success_rate.epoch.env.logic": 0.5975609756097561, "success_rate.epoch.env.math": 0.9812206572769953, "success_rate.epoch.env.sat": 0.1064516129032258, "success_rate.epoch.env.science": 0.9615705931495405, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6036270786589151, "success_rate.epoch.global": 0.8612833647883527, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9926447451227187, "tokens_p.mean_below_band": 2.2032431193760464e-07, "tokens_p.mean_in_band": 0.4401719252950644, "tokens_rate.above_band": 0.771921301918873, "tokens_rate.below_band": 0.0017002671848433326, "tokens_rate.in_band": 0.2263784308962837 }, { "epoch": 1.8600398936170213, "grad_norm": 17.965782810363788, "learning_rate": 1.6050237385929072e-07, "loss": 0.286, "step": 11190, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5111111111111111, "success_rate.epoch.env.logic": 0.5980511571254568, "success_rate.epoch.env.math": 0.9812382739212008, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9615786176654834, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6057756199924909, "success_rate.epoch.global": 0.8611260775862069, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9976018396846255, "tokens_p.mean_below_band": 2.5331974029541016e-07, "tokens_p.mean_in_band": 0.5321607268258427, "tokens_rate.above_band": 0.9140043238049483, "tokens_rate.below_band": 0.00048042277203939464, "tokens_rate.in_band": 0.08551525342301225 }, { "epoch": 1.8608710106382977, "grad_norm": 60.293293328870845, "learning_rate": 1.6049645687982784e-07, "loss": 0.2481, "step": 11195, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5111111111111111, "success_rate.epoch.env.logic": 0.5985401459854015, "success_rate.epoch.env.math": 0.9812558575445174, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9616026711185309, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6058238587139736, "success_rate.epoch.global": 0.8612195450262485, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9936538461538461, "tokens_p.mean_in_band": 0.58125, "tokens_rate.above_band": 0.9154929577464789, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08450704225352113 }, { "epoch": 1.8617021276595744, "grad_norm": 100.32767598264614, "learning_rate": 1.6049057451437579e-07, "loss": 0.2486, "step": 11200, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5111111111111111, "success_rate.epoch.env.logic": 0.5985401459854015, "success_rate.epoch.env.math": 0.9812734082397003, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9616186900292032, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.605826910496324, "success_rate.epoch.global": 0.861275565123789, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990727002967359, "tokens_p.mean_in_band": 0.7513020833333334, "tokens_rate.above_band": 0.9825072886297376, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01749271137026239 }, { "epoch": 1.8625332446808511, "grad_norm": 48.68398984170237, "learning_rate": 1.6048472677337444e-07, "loss": 0.2288, "step": 11205, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5111111111111111, "success_rate.epoch.env.logic": 0.5985401459854015, "success_rate.epoch.env.math": 0.9812734082397003, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9616426933500104, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6058290926163974, "success_rate.epoch.global": 0.8613315400134499, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.6875, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9971915121255349, "tokens_p.mean_below_band": 2.514570951461792e-08, "tokens_p.mean_in_band": 0.5790441176470589, "tokens_rate.above_band": 0.9749652294853964, "tokens_rate.below_band": 0.0013908205841446453, "tokens_rate.in_band": 0.02364394993045897 }, { "epoch": 1.8633643617021276, "grad_norm": 162.5659880122399, "learning_rate": 1.6047891366720202e-07, "loss": 0.2419, "step": 11210, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5111111111111111, "success_rate.epoch.env.logic": 0.5990279465370595, "success_rate.epoch.env.math": 0.9812909260991581, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9616746511143511, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6058779359050752, "success_rate.epoch.global": 0.8614433543878511, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965234375, "tokens_p.mean_below_band": 1.6589183360338211e-09, "tokens_p.mean_in_band": 0.746875, "tokens_rate.above_band": 0.9918175055789735, "tokens_rate.below_band": 0.0007438631291842301, "tokens_rate.in_band": 0.007438631291842301 }, { "epoch": 1.8641954787234043, "grad_norm": 32.32806014290728, "learning_rate": 1.604731352061754e-07, "loss": 0.18, "step": 11215, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5138121546961326, "success_rate.epoch.env.logic": 0.5990279465370595, "success_rate.epoch.env.math": 0.9813084112149533, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9617145235122764, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6061286996413244, "success_rate.epoch.global": 0.8615735767991407, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9936655405405406, "tokens_p.mean_in_band": 0.87890625, "tokens_rate.above_band": 0.9966329966329966, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003367003367003367 }, { "epoch": 1.865026595744681, "grad_norm": 86.18835233329912, "learning_rate": 1.604673914005499e-07, "loss": 0.3505, "step": 11220, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5138121546961326, "success_rate.epoch.env.logic": 0.5995145631067961, "success_rate.epoch.env.math": 0.9813606710158435, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9617384071532543, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6061798596423794, "success_rate.epoch.global": 0.8617035546613011, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9898574561403509, "tokens_p.mean_in_band": 0.729375, "tokens_rate.above_band": 0.9011857707509882, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09881422924901186 }, { "epoch": 1.8658577127659575, "grad_norm": 198.82987104670391, "learning_rate": 1.6046168226051942e-07, "loss": 0.2742, "step": 11225, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.4418604651162791, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5138121546961326, "success_rate.epoch.env.logic": 0.6, "success_rate.epoch.env.math": 0.9813780260707635, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.961770205692915, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6062284587776327, "success_rate.epoch.global": 0.8618147701380512, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9990079365079365, "tokens_p.mean_in_band": 0.69384765625, "tokens_rate.above_band": 0.9895287958115183, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010471204188481676 }, { "epoch": 1.866688829787234, "grad_norm": 186.0056977156713, "learning_rate": 1.6045600779621625e-07, "loss": 0.2211, "step": 11230, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44508670520231214, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5138121546961326, "success_rate.epoch.env.logic": 0.6, "success_rate.epoch.env.math": 0.9813953488372092, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9618019514220469, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6065262141032336, "success_rate.epoch.global": 0.8619258068836213, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9996056050482554, "tokens_p.mean_in_band": 0.6617838541666666, "tokens_rate.above_band": 0.9739696312364425, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026030368763557483 }, { "epoch": 1.8675199468085106, "grad_norm": 65.54784536311597, "learning_rate": 1.6045036801771116e-07, "loss": 0.1235, "step": 11235, "success_rate.epoch.env.abd": 0.6086956521739131, "success_rate.epoch.env.agentgym:alfworld": 0.44508670520231214, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5138121546961326, "success_rate.epoch.env.logic": 0.6, "success_rate.epoch.env.math": 0.9814126394052045, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9618415595188718, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6065313867091263, "success_rate.epoch.global": 0.862036665328516, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9861111111111112, "tokens_p.mean_in_band": 0.8697916666666666, "tokens_rate.above_band": 0.972972972972973, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02702702702702703 }, { "epoch": 1.8683510638297873, "grad_norm": 200.75192461161555, "learning_rate": 1.604447629350134e-07, "loss": 0.208, "step": 11240, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44508670520231214, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5138121546961326, "success_rate.epoch.env.logic": 0.6, "success_rate.epoch.env.math": 0.9814126394052045, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9618810855603894, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6053576189562779, "success_rate.epoch.global": 0.8620136381869233, "success_rate.window.env.abd": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959706303724928, "tokens_p.mean_in_band": 0.10093788735667802, "tokens_rate.above_band": 0.1778343949044586, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.8221656050955414 }, { "epoch": 1.8691821808510638, "grad_norm": 23.235407406377817, "learning_rate": 1.6043919255807063e-07, "loss": 0.1883, "step": 11245, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44508670520231214, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5138121546961326, "success_rate.epoch.env.logic": 0.6, "success_rate.epoch.env.math": 0.9814126394052045, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9619047619047619, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.605359771351221, "success_rate.epoch.global": 0.8620689655172413, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9995725034199726, "tokens_p.mean_in_band": 0.696533203125, "tokens_rate.above_band": 0.9785809906291834, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0214190093708166 }, { "epoch": 1.8700132978723403, "grad_norm": 30.66713997302514, "learning_rate": 1.6043365689676888e-07, "loss": 0.2502, "step": 11250, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44508670520231214, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5138121546961326, "success_rate.epoch.env.logic": 0.6, "success_rate.epoch.env.math": 0.9814643188137164, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9619205298013245, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6053659029244095, "success_rate.epoch.global": 0.8621610792039536, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9802631578947368, "tokens_p.mean_in_band": 0.849609375, "tokens_rate.above_band": 0.9884393063583815, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011560693641618497 }, { "epoch": 1.870844414893617, "grad_norm": 67.76901205467126, "learning_rate": 1.6042815596093259e-07, "loss": 0.3457, "step": 11255, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44508670520231214, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5138121546961326, "success_rate.epoch.env.logic": 0.6, "success_rate.epoch.env.math": 0.9814814814814815, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9619598924953483, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6053710415936631, "success_rate.epoch.global": 0.862271453356466, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9842641843971631, "tokens_p.mean_below_band": 8.003553375601768e-11, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9791666666666666, "tokens_rate.below_band": 0.006944444444444444, "tokens_rate.in_band": 0.013888888888888888 }, { "epoch": 1.8716755319148937, "grad_norm": 56.42043560569765, "learning_rate": 1.6042268976032464e-07, "loss": 0.2009, "step": 11260, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.4482758620689655, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.599758162031439, "success_rate.epoch.env.math": 0.9814814814814815, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9619834710743802, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6058839742540729, "success_rate.epoch.global": 0.8622666666666666, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9166666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9969491332116789, "tokens_p.mean_in_band": 0.5971354166666667, "tokens_rate.above_band": 0.9864986498649865, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013501350135013501 }, { "epoch": 1.8725066489361701, "grad_norm": 95.31555178768237, "learning_rate": 1.6041725830464608e-07, "loss": 0.2012, "step": 11265, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44571428571428573, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6007237635705669, "success_rate.epoch.env.math": 0.9814986123959297, "success_rate.epoch.env.sat": 0.10610932475884244, "success_rate.epoch.env.science": 0.9619913241065895, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6057411569023553, "success_rate.epoch.global": 0.8622251832111926, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.996482683982684, "tokens_p.mean_in_band": 0.6136300223214286, "tokens_rate.above_band": 0.9705882352941176, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029411764705882353 }, { "epoch": 1.8733377659574468, "grad_norm": 107.83403861715247, "learning_rate": 1.6041186160353648e-07, "loss": 0.2631, "step": 11270, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44571428571428573, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6007237635705669, "success_rate.epoch.env.math": 0.9814986123959297, "success_rate.epoch.env.sat": 0.10576923076923077, "success_rate.epoch.env.science": 0.9620070204418749, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6057116662065074, "success_rate.epoch.global": 0.8621470431539691, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949404761904762, "tokens_p.mean_in_band": 0.6298828125, "tokens_rate.above_band": 0.9292035398230089, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07079646017699115 }, { "epoch": 1.8741688829787235, "grad_norm": 29.44704603586585, "learning_rate": 1.604064996665737e-07, "loss": 0.2217, "step": 11275, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44571428571428573, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6007237635705669, "success_rate.epoch.env.math": 0.9815157116451017, "success_rate.epoch.env.sat": 0.10576923076923077, "success_rate.epoch.env.science": 0.9620462046204621, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6057167828817582, "success_rate.epoch.global": 0.8622571200425871, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9886744966442953, "tokens_p.mean_in_band": 0.869140625, "tokens_rate.above_band": 0.9867549668874173, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013245033112582781 }, { "epoch": 1.875, "grad_norm": 47.11825716294459, "learning_rate": 1.6040117250327378e-07, "loss": 0.236, "step": 11280, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6007237635705669, "success_rate.epoch.env.math": 0.981549815498155, "success_rate.epoch.env.sat": 0.10576923076923077, "success_rate.epoch.env.science": 0.962061855670103, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6060076106592169, "success_rate.epoch.global": 0.8623487165846522, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979104291417166, "tokens_p.mean_in_band": 0.6842672413793104, "tokens_rate.above_band": 0.9718719689621726, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.028128031037827354 }, { "epoch": 1.8758311170212765, "grad_norm": 62.69377819187668, "learning_rate": 1.603958801230912e-07, "loss": 0.275, "step": 11285, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6007237635705669, "success_rate.epoch.env.math": 0.981549815498155, "success_rate.epoch.env.sat": 0.10576923076923077, "success_rate.epoch.env.science": 0.9620853080568721, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6060097426943777, "success_rate.epoch.global": 0.8624036160595586, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9853987068965517, "tokens_p.mean_in_band": 0.85302734375, "tokens_rate.above_band": 0.9863945578231292, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013605442176870748 }, { "epoch": 1.8766622340425532, "grad_norm": 89.13788747062225, "learning_rate": 1.6039062253541858e-07, "loss": 0.2578, "step": 11290, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6, "success_rate.epoch.env.math": 0.9815837937384899, "success_rate.epoch.env.sat": 0.10576923076923077, "success_rate.epoch.env.science": 0.9621087314662273, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6059491643379343, "success_rate.epoch.global": 0.8623804463336876, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9972696799307958, "tokens_p.mean_in_band": 0.5565518465909091, "tokens_rate.above_band": 0.9633333333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03666666666666667 }, { "epoch": 1.8774933510638299, "grad_norm": 58.58675618014322, "learning_rate": 1.603853997495869e-07, "loss": 0.3193, "step": 11295, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.5992779783393501, "success_rate.epoch.env.math": 0.9816007359705612, "success_rate.epoch.env.sat": 0.10576923076923077, "success_rate.epoch.env.science": 0.9621243310004117, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6058864843475349, "success_rate.epoch.global": 0.8623207647371216, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9941793893129771, "tokens_p.mean_in_band": 0.5371942934782609, "tokens_rate.above_band": 0.9660766961651918, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03392330383480826 }, { "epoch": 1.8783244680851063, "grad_norm": 85.73088210912971, "learning_rate": 1.603802117748653e-07, "loss": 0.3214, "step": 11300, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.5992779783393501, "success_rate.epoch.env.math": 0.9816007359705612, "success_rate.epoch.env.sat": 0.10576923076923077, "success_rate.epoch.env.science": 0.9621554915672563, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6058893171263389, "success_rate.epoch.global": 0.8623938428874734, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9936868686868687, "tokens_p.mean_in_band": 0.6192555147058824, "tokens_rate.above_band": 0.9209302325581395, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07906976744186046 }, { "epoch": 1.879155585106383, "grad_norm": 8.608370218210926, "learning_rate": 1.6037505862046114e-07, "loss": 0.1815, "step": 11305, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6002400960384153, "success_rate.epoch.env.math": 0.9816176470588235, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9621788283658788, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6059497211918727, "success_rate.epoch.global": 0.862388969905873, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959239130434783, "tokens_p.mean_in_band": 0.6294270833333333, "tokens_rate.above_band": 0.9608355091383812, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0391644908616188 }, { "epoch": 1.8799867021276597, "grad_norm": 103.69552313615101, "learning_rate": 1.603699402955201e-07, "loss": 0.4362, "step": 11310, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6007194244604317, "success_rate.epoch.env.math": 0.9816345270890725, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9622021364009861, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6059969499634522, "success_rate.epoch.global": 0.8624801271860095, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.9523809523809524, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9931768558951966, "tokens_p.mean_below_band": 5.20230969414115e-10, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9870689655172413, "tokens_rate.below_band": 0.004310344827586207, "tokens_rate.in_band": 0.008620689655172414 }, { "epoch": 1.8808178191489362, "grad_norm": 130.46109651089287, "learning_rate": 1.6036485680912584e-07, "loss": 0.3604, "step": 11315, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6007194244604317, "success_rate.epoch.env.math": 0.981651376146789, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9622021364009861, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6059984816959717, "success_rate.epoch.global": 0.8624983441515433, "success_rate.window.env.logic": 0.25, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.998152359295054, "tokens_p.mean_in_band": 0.6106770833333334, "tokens_rate.above_band": 0.9865395401009535, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01346045989904655 }, { "epoch": 1.8816489361702127, "grad_norm": 326.2092933475463, "learning_rate": 1.6035980817030035e-07, "loss": 0.2834, "step": 11320, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6007194244604317, "success_rate.epoch.env.math": 0.9816849816849816, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9622098993633189, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6060022424687468, "success_rate.epoch.global": 0.862552966101695, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.6190476190476191, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9948863636363636, "tokens_p.mean_below_band": 1.4915713109076023e-10, "tokens_p.mean_in_band": 0.6549479166666666, "tokens_rate.above_band": 0.990990990990991, "tokens_rate.below_band": 0.0022522522522522522, "tokens_rate.in_band": 0.006756756756756757 }, { "epoch": 1.8824800531914894, "grad_norm": 127.88686231043508, "learning_rate": 1.6035479438800375e-07, "loss": 0.3034, "step": 11325, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6007194244604317, "success_rate.epoch.env.math": 0.9817184643510055, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9622254157257236, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6060066969258767, "success_rate.epoch.global": 0.8626257278983589, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9989740082079344, "tokens_p.mean_in_band": 0.6335227272727273, "tokens_rate.above_band": 0.9707835325365206, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029216467463479414 }, { "epoch": 1.883311170212766, "grad_norm": 60.87590487575993, "learning_rate": 1.6034981547113425e-07, "loss": 0.1766, "step": 11330, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6011976047904192, "success_rate.epoch.env.math": 0.9817184643510055, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9622409193515288, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6060515772854942, "success_rate.epoch.global": 0.8626802487101468, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984217171717171, "tokens_p.mean_in_band": 0.7094089673913043, "tokens_rate.above_band": 0.9717791411042945, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02822085889570552 }, { "epoch": 1.8841422872340425, "grad_norm": 97.77985017579189, "learning_rate": 1.603448714285282e-07, "loss": 0.2305, "step": 11335, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6016746411483254, "success_rate.epoch.env.math": 0.9817184643510055, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9622641509433962, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6060970561900191, "success_rate.epoch.global": 0.8627528758429195, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9933986371379898, "tokens_p.mean_in_band": 0.7799479166666666, "tokens_rate.above_band": 0.9949152542372881, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005084745762711864 }, { "epoch": 1.884973404255319, "grad_norm": 103.29748197465557, "learning_rate": 1.603399622689601e-07, "loss": 0.3439, "step": 11340, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6014319809069213, "success_rate.epoch.env.math": 0.9817184643510055, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9622796227962279, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.606076402700149, "success_rate.epoch.global": 0.8626932734240782, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9949142156862745, "tokens_p.mean_in_band": 0.5248825328162291, "tokens_rate.above_band": 0.9068681929317626, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09313180706823738 }, { "epoch": 1.8858045212765957, "grad_norm": 52.62326960446603, "learning_rate": 1.6033508800114245e-07, "loss": 0.1612, "step": 11345, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6014319809069213, "success_rate.epoch.env.math": 0.9817184643510055, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.962318246979316, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6060799139895205, "success_rate.epoch.global": 0.862783940834654, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957896706586826, "tokens_p.mean_in_band": 0.826171875, "tokens_rate.above_band": 0.9940476190476191, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005952380952380952 }, { "epoch": 1.8866356382978724, "grad_norm": 53.55374667437642, "learning_rate": 1.6033024863372592e-07, "loss": 0.2834, "step": 11350, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6011904761904762, "success_rate.epoch.env.math": 0.9817184643510055, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9623336745138178, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6060593615184348, "success_rate.epoch.global": 0.8627243928194298, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777778, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9967575571895425, "tokens_p.mean_in_band": 0.6988636363636364, "tokens_rate.above_band": 0.9823434991974318, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01765650080256822 }, { "epoch": 1.8874667553191489, "grad_norm": 47.18018054912582, "learning_rate": 1.603254441752992e-07, "loss": 0.2352, "step": 11355, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5164835164835165, "success_rate.epoch.env.logic": 0.6011904761904762, "success_rate.epoch.env.math": 0.9817351598173516, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9623567921440261, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6060629808908488, "success_rate.epoch.global": 0.862796833773087, "success_rate.window.env.abd": 1.0, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9967873831775701, "tokens_p.mean_in_band": 0.673828125, "tokens_rate.above_band": 0.9938080495356038, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006191950464396285 }, { "epoch": 1.8882978723404256, "grad_norm": 40.45229193401803, "learning_rate": 1.6032067463438904e-07, "loss": 0.2188, "step": 11360, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5136612021857924, "success_rate.epoch.env.logic": 0.6016646848989299, "success_rate.epoch.env.math": 0.9817518248175182, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9623875715453802, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6058538298737806, "success_rate.epoch.global": 0.8627916172400159, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9984302325581396, "tokens_p.mean_in_band": 0.6436941964285714, "tokens_rate.above_band": 0.9808394160583942, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01916058394160584 }, { "epoch": 1.8891289893617023, "grad_norm": 26.806448069486844, "learning_rate": 1.6031594001946017e-07, "loss": 0.235, "step": 11365, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5136612021857924, "success_rate.epoch.env.logic": 0.6016646848989299, "success_rate.epoch.env.math": 0.9817518248175182, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9624029423784226, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6058552272222392, "success_rate.epoch.global": 0.8628277770457241, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974573490813649, "tokens_p.mean_in_band": 0.3175536303630363, "tokens_rate.above_band": 0.7154929577464789, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.28450704225352114 }, { "epoch": 1.8899601063829787, "grad_norm": 89.31725038185034, "learning_rate": 1.603112403389154e-07, "loss": 0.1634, "step": 11370, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5136612021857924, "success_rate.epoch.env.logic": 0.6016646848989299, "success_rate.epoch.env.math": 0.9817850637522769, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9624183006535948, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6058596451504147, "success_rate.epoch.global": 0.8629000395100751, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974279835390947, "tokens_p.mean_in_band": 0.7064732142857143, "tokens_rate.above_band": 0.9886086248982913, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011391375101708706 }, { "epoch": 1.8907912234042552, "grad_norm": 62.86644434378806, "learning_rate": 1.6030657560109545e-07, "loss": 0.3502, "step": 11375, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5136612021857924, "success_rate.epoch.env.logic": 0.6016646848989299, "success_rate.epoch.env.math": 0.9817850637522769, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9624336463862802, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6058610402170224, "success_rate.epoch.global": 0.862936142198815, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9968387230514096, "tokens_p.mean_in_band": 0.6637620192307693, "tokens_rate.above_band": 0.9893355209187858, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01066447908121411 }, { "epoch": 1.891622340425532, "grad_norm": 0.0, "learning_rate": 1.603019458142792e-07, "loss": 0.2206, "step": 11380, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.44886363636363635, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5136612021857924, "success_rate.epoch.env.logic": 0.6016646848989299, "success_rate.epoch.env.math": 0.9818016378525932, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9624566415017344, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6058646374184561, "success_rate.epoch.global": 0.863008290564548, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959580838323353, "tokens_p.mean_in_band": 0.5863970588235294, "tokens_rate.above_band": 0.9800469483568075, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01995305164319249 }, { "epoch": 1.8924534574468086, "grad_norm": 49.852831788625586, "learning_rate": 1.6029735098668332e-07, "loss": 0.2803, "step": 11385, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.4519774011299435, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5108695652173914, "success_rate.epoch.env.logic": 0.6016646848989299, "success_rate.epoch.env.math": 0.9818016378525932, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9624719559453396, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6058953139858662, "success_rate.epoch.global": 0.8629488359857951, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9951102418207681, "tokens_p.mean_in_band": 0.7209821428571429, "tokens_rate.above_band": 0.9525745257452575, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04742547425474255 }, { "epoch": 1.893284574468085, "grad_norm": 80.8369620975784, "learning_rate": 1.6029279112646247e-07, "loss": 0.2365, "step": 11390, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.4519774011299435, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5108695652173914, "success_rate.epoch.env.logic": 0.6016646848989299, "success_rate.epoch.env.math": 0.9818346957311535, "success_rate.epoch.env.sat": 0.10543130990415335, "success_rate.epoch.env.science": 0.9624796084828712, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6058990149327836, "success_rate.epoch.global": 0.8630028924533263, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8888888888888888, "tokens_p.mean_above_band": 0.9992982784431138, "tokens_p.mean_in_band": 0.7046875, "tokens_rate.above_band": 0.9852507374631269, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014749262536873156 }, { "epoch": 1.8941156914893615, "grad_norm": 141.69365597165583, "learning_rate": 1.6028826624170935e-07, "loss": 0.1629, "step": 11395, "success_rate.epoch.env.abd": 0.5957446808510638, "success_rate.epoch.env.agentgym:alfworld": 0.4519774011299435, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5108695652173914, "success_rate.epoch.env.logic": 0.6021377672209026, "success_rate.epoch.env.math": 0.9818676337262012, "success_rate.epoch.env.sat": 0.10509554140127389, "success_rate.epoch.env.science": 0.9625101874490628, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6059172722764502, "success_rate.epoch.global": 0.8630154977672708, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99292071197411, "tokens_p.mean_in_band": 0.6747532894736842, "tokens_rate.above_band": 0.9420731707317073, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.057926829268292686 }, { "epoch": 1.8949468085106385, "grad_norm": 48.5536778056079, "learning_rate": 1.6028377634045448e-07, "loss": 0.1333, "step": 11400, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4519774011299435, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5108695652173914, "success_rate.epoch.env.logic": 0.6021377672209026, "success_rate.epoch.env.math": 0.9818840579710145, "success_rate.epoch.env.sat": 0.10476190476190476, "success_rate.epoch.env.science": 0.9625330889839137, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6066561518178956, "success_rate.epoch.global": 0.862992125984252, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967384708737864, "tokens_p.mean_in_band": 0.74609375, "tokens_rate.above_band": 0.9903846153846154, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009615384615384616 }, { "epoch": 1.895777925531915, "grad_norm": 98.79742863293252, "learning_rate": 1.6027932143066636e-07, "loss": 0.2673, "step": 11405, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4550561797752809, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5108695652173914, "success_rate.epoch.env.logic": 0.6018957345971564, "success_rate.epoch.env.math": 0.9818840579710145, "success_rate.epoch.env.sat": 0.10476190476190476, "success_rate.epoch.env.science": 0.9625483411357623, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6069154243791173, "success_rate.epoch.global": 0.8629508196721312, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9982861060329068, "tokens_p.mean_in_band": 0.6682942708333334, "tokens_rate.above_band": 0.9785330948121646, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02146690518783542 }, { "epoch": 1.8966090425531914, "grad_norm": 129.38304186776782, "learning_rate": 1.6027490152025135e-07, "loss": 0.2258, "step": 11410, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4550561797752809, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5108695652173914, "success_rate.epoch.env.logic": 0.6023668639053255, "success_rate.epoch.env.math": 0.9818840579710145, "success_rate.epoch.env.sat": 0.10476190476190476, "success_rate.epoch.env.science": 0.9625788082163921, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6069610240508263, "success_rate.epoch.global": 0.8630406290956749, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9935109289617486, "tokens_p.mean_in_band": 0.8151041666666666, "tokens_rate.above_band": 0.9838709677419355, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.016129032258064516 }, { "epoch": 1.897440159574468, "grad_norm": 32.149604529790686, "learning_rate": 1.6027051661705372e-07, "loss": 0.2143, "step": 11415, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4550561797752809, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5108695652173914, "success_rate.epoch.env.logic": 0.6023668639053255, "success_rate.epoch.env.math": 0.9818840579710145, "success_rate.epoch.env.sat": 0.10725552050473186, "success_rate.epoch.env.science": 0.9626092257671205, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6071904816229677, "success_rate.epoch.global": 0.8630172865374541, "success_rate.window.env.abd": 1.0, "success_rate.window.env.sat": 0.5, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9870383522727273, "tokens_p.mean_in_band": 0.7102864583333334, "tokens_rate.above_band": 0.9361702127659575, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06382978723404255 }, { "epoch": 1.8982712765957448, "grad_norm": 22.914581354953942, "learning_rate": 1.6026616672885558e-07, "loss": 0.2863, "step": 11420, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4550561797752809, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081081081081081, "success_rate.epoch.env.logic": 0.6023668639053255, "success_rate.epoch.env.math": 0.9819004524886877, "success_rate.epoch.env.sat": 0.1069182389937107, "success_rate.epoch.env.science": 0.9626092257671205, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6069102685227286, "success_rate.epoch.global": 0.8628092682288258, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9847608024691358, "tokens_p.mean_in_band": 0.7594039351851852, "tokens_rate.above_band": 0.8888888888888888, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1111111111111111 }, { "epoch": 1.8991023936170213, "grad_norm": 90.45965953292954, "learning_rate": 1.6026185186337692e-07, "loss": 0.1591, "step": 11425, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4550561797752809, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081081081081081, "success_rate.epoch.env.logic": 0.6028368794326241, "success_rate.epoch.env.math": 0.9819168173598554, "success_rate.epoch.env.sat": 0.1069182389937107, "success_rate.epoch.env.science": 0.9626471782379212, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6069579351472073, "success_rate.epoch.global": 0.8629348679047868, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9947604790419161, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.8999335106382977, "grad_norm": 21.472755586414458, "learning_rate": 1.602575720282756e-07, "loss": 0.1883, "step": 11430, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4550561797752809, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081081081081081, "success_rate.epoch.env.logic": 0.6028368794326241, "success_rate.epoch.env.math": 0.98193315266486, "success_rate.epoch.env.sat": 0.1069182389937107, "success_rate.epoch.env.science": 0.9626699127612092, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6069614869497794, "success_rate.epoch.global": 0.8630065359477124, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8888888888888888, "success_rate.window.env_macro_mean": 0.9444444444444444, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9921875, "tokens_p.mean_in_band": 0.74609375, "tokens_rate.above_band": 0.9428571428571428, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05714285714285714 }, { "epoch": 1.9007646276595744, "grad_norm": 220.3343957923839, "learning_rate": 1.6025332723114724e-07, "loss": 0.1392, "step": 11435, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4550561797752809, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081081081081081, "success_rate.epoch.env.logic": 0.6028368794326241, "success_rate.epoch.env.math": 0.981965734896303, "success_rate.epoch.env.sat": 0.1069182389937107, "success_rate.epoch.env.science": 0.9627077421970004, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.606967888010437, "success_rate.epoch.global": 0.8631317748465457, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9923611111111111, "tokens_p.mean_in_band": 0.84375, "tokens_rate.above_band": 0.9782608695652174, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021739130434782608 }, { "epoch": 1.9015957446808511, "grad_norm": 16.547993577199946, "learning_rate": 1.6024911747952533e-07, "loss": 0.1633, "step": 11440, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4550561797752809, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5081081081081081, "success_rate.epoch.env.logic": 0.6028368794326241, "success_rate.epoch.env.math": 0.9819819819819819, "success_rate.epoch.env.sat": 0.1069182389937107, "success_rate.epoch.env.science": 0.962753036437247, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6069734826764303, "success_rate.epoch.global": 0.8632567849686847, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9947916666666666, "tokens_p.mean_in_band": 0.6449652777777778, "tokens_rate.above_band": 0.9032258064516129, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0967741935483871 }, { "epoch": 1.9024268617021276, "grad_norm": 124.53037003467924, "learning_rate": 1.6024494278088112e-07, "loss": 0.2625, "step": 11445, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4550561797752809, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5053763440860215, "success_rate.epoch.env.logic": 0.6028368794326241, "success_rate.epoch.env.math": 0.9819819819819819, "success_rate.epoch.env.sat": 0.1069182389937107, "success_rate.epoch.env.science": 0.9627906976744186, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.606728564241438, "success_rate.epoch.global": 0.8632333767926988, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9956814472252449, "tokens_p.mean_below_band": 4.172325134277344e-07, "tokens_p.mean_in_band": 0.5116915287456446, "tokens_rate.above_band": 0.9273461150353178, "tokens_rate.below_band": 0.0002522704339051463, "tokens_rate.in_band": 0.072401614530777 }, { "epoch": 1.9032579787234043, "grad_norm": 41.38374914590418, "learning_rate": 1.6024080314262374e-07, "loss": 0.1665, "step": 11450, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45251396648044695, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5053763440860215, "success_rate.epoch.env.logic": 0.6028368794326241, "success_rate.epoch.env.math": 0.981998199819982, "success_rate.epoch.env.sat": 0.1069182389937107, "success_rate.epoch.env.science": 0.9628057408530423, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6065002958525099, "success_rate.epoch.global": 0.8631743549648163, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.875, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9958910034602076, "tokens_p.mean_below_band": 9.049472282640636e-11, "tokens_p.mean_in_band": 0.6884765625, "tokens_rate.above_band": 0.9897260273972602, "tokens_rate.below_band": 0.003424657534246575, "tokens_rate.in_band": 0.00684931506849315 }, { "epoch": 1.904089095744681, "grad_norm": 81.97214940327356, "learning_rate": 1.6023669857209997e-07, "loss": 0.2303, "step": 11455, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45251396648044695, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5053763440860215, "success_rate.epoch.env.logic": 0.6028368794326241, "success_rate.epoch.env.math": 0.9820466786355476, "success_rate.epoch.env.sat": 0.1069182389937107, "success_rate.epoch.env.science": 0.9626338113512422, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.606489073062852, "success_rate.epoch.global": 0.8631688582215857, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.6944444444444443, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9985537190082645, "tokens_p.mean_below_band": 1.9744038581848145e-07, "tokens_p.mean_in_band": 0.6348586309523809, "tokens_rate.above_band": 0.9821428571428571, "tokens_rate.below_band": 0.0008116883116883117, "tokens_rate.in_band": 0.017045454545454544 }, { "epoch": 1.9049202127659575, "grad_norm": 58.50033841423256, "learning_rate": 1.6023262907659447e-07, "loss": 0.1489, "step": 11460, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45251396648044695, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5053763440860215, "success_rate.epoch.env.logic": 0.6033057851239669, "success_rate.epoch.env.math": 0.9820627802690582, "success_rate.epoch.env.sat": 0.1069182389937107, "success_rate.epoch.env.science": 0.9626564392410174, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6065352217187274, "success_rate.epoch.global": 0.8632578714545928, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.992737676056338, "tokens_p.mean_in_band": 0.89453125, "tokens_rate.above_band": 0.9964912280701754, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0035087719298245615 }, { "epoch": 1.905751329787234, "grad_norm": 44.43631535460764, "learning_rate": 1.6022859466332956e-07, "loss": 0.2256, "step": 11465, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45251396648044695, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5053763440860215, "success_rate.epoch.env.logic": 0.6033057851239669, "success_rate.epoch.env.math": 0.9820627802690582, "success_rate.epoch.env.sat": 0.10658307210031348, "success_rate.epoch.env.science": 0.9626715092816788, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6065061220048423, "success_rate.epoch.global": 0.8631811679021979, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9969604099678456, "tokens_p.mean_in_band": 0.6991458722014925, "tokens_rate.above_band": 0.9488939740655988, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05110602593440122 }, { "epoch": 1.9065824468085106, "grad_norm": 53.68976373696804, "learning_rate": 1.6022459533946535e-07, "loss": 0.169, "step": 11470, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45251396648044695, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5053763440860215, "success_rate.epoch.env.logic": 0.6033057851239669, "success_rate.epoch.env.math": 0.982078853046595, "success_rate.epoch.env.sat": 0.10658307210031348, "success_rate.epoch.env.science": 0.9626865671641791, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6065089520648457, "success_rate.epoch.global": 0.8632345293811753, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9889240506329114, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9875, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0125 }, { "epoch": 1.9074135638297873, "grad_norm": 108.10591053224735, "learning_rate": 1.6022063111209968e-07, "loss": 0.2853, "step": 11475, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45555555555555555, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5080213903743316, "success_rate.epoch.env.logic": 0.6033057851239669, "success_rate.epoch.env.math": 0.9821109123434705, "success_rate.epoch.env.sat": 0.10658307210031348, "success_rate.epoch.env.science": 0.9627091312235436, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6070308846757237, "success_rate.epoch.global": 0.8633588777763346, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9988312007874016, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9990167158308751, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0009832841691248771 }, { "epoch": 1.9082446808510638, "grad_norm": 86.67058423284875, "learning_rate": 1.6021670198826803e-07, "loss": 0.2626, "step": 11480, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45555555555555555, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5080213903743316, "success_rate.epoch.env.logic": 0.6033057851239669, "success_rate.epoch.env.math": 0.9821587867975022, "success_rate.epoch.env.sat": 0.10658307210031348, "success_rate.epoch.env.science": 0.9627391742195368, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6070379680802716, "success_rate.epoch.global": 0.8634830002595381, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9917385057471264, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.9090757978723403, "grad_norm": 59.060811380267666, "learning_rate": 1.6021280797494365e-07, "loss": 0.2623, "step": 11485, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45555555555555555, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5080213903743316, "success_rate.epoch.env.logic": 0.6030624263839811, "success_rate.epoch.env.math": 0.982174688057041, "success_rate.epoch.env.sat": 0.10658307210031348, "success_rate.epoch.env.science": 0.962754177571975, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6070186540686344, "success_rate.epoch.global": 0.8634418363376994, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949776785714286, "tokens_p.mean_in_band": 0.5220588235294118, "tokens_rate.above_band": 0.978776529338327, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02122347066167291 }, { "epoch": 1.909906914893617, "grad_norm": 51.45124186925617, "learning_rate": 1.602089490790375e-07, "loss": 0.2935, "step": 11490, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45555555555555555, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5080213903743316, "success_rate.epoch.env.logic": 0.6030624263839811, "success_rate.epoch.env.math": 0.9821905609973286, "success_rate.epoch.env.sat": 0.10658307210031348, "success_rate.epoch.env.science": 0.9627766599597586, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6070221409166409, "success_rate.epoch.global": 0.8635126377187298, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9943321078431373, "tokens_p.mean_in_band": 0.8525390625, "tokens_rate.above_band": 0.9807692307692307, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019230769230769232 }, { "epoch": 1.9107380319148937, "grad_norm": 67.56799166566323, "learning_rate": 1.6020512530739813e-07, "loss": 0.1777, "step": 11495, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45555555555555555, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5080213903743316, "success_rate.epoch.env.logic": 0.6035294117647059, "success_rate.epoch.env.math": 0.9822064056939501, "success_rate.epoch.env.sat": 0.10625, "success_rate.epoch.env.science": 0.9627916331456154, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6070371164769035, "success_rate.epoch.global": 0.8634715025906736, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9937707641196013, "tokens_p.mean_in_band": 0.6776315789473685, "tokens_rate.above_band": 0.940625, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.059375 }, { "epoch": 1.9115691489361701, "grad_norm": 61.428165543795004, "learning_rate": 1.6020133666681183e-07, "loss": 0.2526, "step": 11500, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45555555555555555, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5080213903743316, "success_rate.epoch.env.logic": 0.6035294117647059, "success_rate.epoch.env.math": 0.9822222222222222, "success_rate.epoch.env.sat": 0.10625, "success_rate.epoch.env.science": 0.9628140703517588, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6070405940891231, "success_rate.epoch.global": 0.8635422061108234, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9940567484662577, "tokens_p.mean_in_band": 0.5244140625, "tokens_rate.above_band": 0.9106145251396648, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0893854748603352 }, { "epoch": 1.9124002659574468, "grad_norm": 105.8776534134594, "learning_rate": 1.601975831640025e-07, "loss": 0.2597, "step": 11505, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45555555555555555, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5080213903743316, "success_rate.epoch.env.logic": 0.6039952996474736, "success_rate.epoch.env.math": 0.9822380106571936, "success_rate.epoch.env.sat": 0.10625, "success_rate.epoch.env.science": 0.9628364805142627, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6070864201327816, "success_rate.epoch.global": 0.8636304825980075, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9937770562770563, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9829787234042553, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01702127659574468 }, { "epoch": 1.9132313829787235, "grad_norm": 97.11674789639916, "learning_rate": 1.6019386480563167e-07, "loss": 0.2166, "step": 11510, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45555555555555555, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5106382978723404, "success_rate.epoch.env.logic": 0.6039952996474736, "success_rate.epoch.env.math": 0.9822380106571936, "success_rate.epoch.env.sat": 0.10625, "success_rate.epoch.env.science": 0.9628812199036918, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6073283880316396, "success_rate.epoch.global": 0.8637538779731128, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9968505039193729, "tokens_p.mean_in_band": 0.65234375, "tokens_rate.above_band": 0.9977653631284916, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0022346368715083797 }, { "epoch": 1.9140625, "grad_norm": 318.8521687174014, "learning_rate": 1.6019018159829852e-07, "loss": 0.1511, "step": 11515, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45555555555555555, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5106382978723404, "success_rate.epoch.env.logic": 0.6039952996474736, "success_rate.epoch.env.math": 0.9822695035460993, "success_rate.epoch.env.sat": 0.10625, "success_rate.epoch.env.science": 0.9629109863672815, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6073339570636845, "success_rate.epoch.global": 0.863859467837768, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9959446564885496, "tokens_p.mean_in_band": 0.791015625, "tokens_rate.above_band": 0.9812734082397003, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018726591760299626 }, { "epoch": 1.9148936170212765, "grad_norm": 161.61199187213788, "learning_rate": 1.6018653354853984e-07, "loss": 0.2166, "step": 11520, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4530386740331492, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5106382978723404, "success_rate.epoch.env.logic": 0.6039952996474736, "success_rate.epoch.env.math": 0.9822695035460993, "success_rate.epoch.env.sat": 0.10625, "success_rate.epoch.env.science": 0.9629258517034068, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6071065010467499, "success_rate.epoch.global": 0.8637830858618464, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9948937908496732, "tokens_p.mean_in_band": 0.6067708333333334, "tokens_rate.above_band": 0.9902912621359223, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009708737864077669 }, { "epoch": 1.9157247340425532, "grad_norm": 44.21863604748953, "learning_rate": 1.6018292066283007e-07, "loss": 0.1606, "step": 11525, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.4530386740331492, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5132275132275133, "success_rate.epoch.env.logic": 0.6037514654161782, "success_rate.epoch.env.math": 0.9822852081488043, "success_rate.epoch.env.sat": 0.10625, "success_rate.epoch.env.science": 0.9629481273783297, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6073231702650687, "success_rate.epoch.global": 0.8637770897832817, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.999214824120603, "tokens_p.mean_in_band": 0.7141927083333334, "tokens_rate.above_band": 0.996993987975952, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003006012024048096 }, { "epoch": 1.9165558510638299, "grad_norm": 214.8492666990466, "learning_rate": 1.601793429475811e-07, "loss": 0.2341, "step": 11530, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5132275132275133, "success_rate.epoch.env.logic": 0.6037514654161782, "success_rate.epoch.env.math": 0.9822852081488043, "success_rate.epoch.env.sat": 0.10625, "success_rate.epoch.env.science": 0.962992598519704, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.60710092005213, "success_rate.epoch.global": 0.8637711045237788, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9967296511627907, "tokens_p.mean_in_band": 0.40625, "tokens_rate.above_band": 0.9971014492753624, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.002898550724637681 }, { "epoch": 1.9173869680851063, "grad_norm": 78.69507554428479, "learning_rate": 1.601758004091426e-07, "loss": 0.372, "step": 11535, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5132275132275133, "success_rate.epoch.env.logic": 0.6030444964871194, "success_rate.epoch.env.math": 0.982316534040672, "success_rate.epoch.env.sat": 0.10625, "success_rate.epoch.env.science": 0.963014794082367, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6070415157362639, "success_rate.epoch.global": 0.863747585318738, "success_rate.window.env.logic": 0.3333333333333333, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5833333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 1.0003210616438356, "tokens_p.mean_in_band": 0.6401486280487805, "tokens_rate.above_band": 0.9771332961517011, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02286670384829894 }, { "epoch": 1.918218085106383, "grad_norm": 118.88390277179843, "learning_rate": 1.6017229305380167e-07, "loss": 0.1534, "step": 11540, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5132275132275133, "success_rate.epoch.env.logic": 0.6030444964871194, "success_rate.epoch.env.math": 0.982316534040672, "success_rate.epoch.env.sat": 0.10625, "success_rate.epoch.env.science": 0.9630443467838594, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6070442023454905, "success_rate.epoch.global": 0.8638177371605097, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.98875, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9868421052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013157894736842105 }, { "epoch": 1.9190492021276597, "grad_norm": 112.21795561066655, "learning_rate": 1.6016882088778295e-07, "loss": 0.4022, "step": 11545, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5132275132275133, "success_rate.epoch.env.logic": 0.6035087719298246, "success_rate.epoch.env.math": 0.9823321554770318, "success_rate.epoch.env.sat": 0.10625, "success_rate.epoch.env.science": 0.9630738522954092, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6070905116537282, "success_rate.epoch.global": 0.8639228295819936, "success_rate.window.env.abd": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9840158045977011, "tokens_p.mean_below_band": 2.7153227064344617e-07, "tokens_p.mean_in_band": 0.09986111862024222, "tokens_rate.above_band": 0.13038591232671412, "tokens_rate.below_band": 0.0033720494567253652, "tokens_rate.in_band": 0.8662420382165605 }, { "epoch": 1.9198803191489362, "grad_norm": 33.87779541816911, "learning_rate": 1.601653839172488e-07, "loss": 0.2366, "step": 11550, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5105263157894737, "success_rate.epoch.env.logic": 0.6039719626168224, "success_rate.epoch.env.math": 0.982363315696649, "success_rate.epoch.env.sat": 0.10625, "success_rate.epoch.env.science": 0.9630738522954092, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6068898892417806, "success_rate.epoch.global": 0.8638642499035866, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.75, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.55, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9947751322751323, "tokens_p.mean_in_band": 0.6790140086206896, "tokens_rate.above_band": 0.942173479561316, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05782652043868395 }, { "epoch": 1.9207114361702127, "grad_norm": 41.07876936972143, "learning_rate": 1.6016198214829891e-07, "loss": 0.2828, "step": 11555, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5105263157894737, "success_rate.epoch.env.logic": 0.6039719626168224, "success_rate.epoch.env.math": 0.9823788546255506, "success_rate.epoch.env.sat": 0.10625, "success_rate.epoch.env.science": 0.9630885873902634, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6068926414257583, "success_rate.epoch.global": 0.8639167309175019, "success_rate.window.env.abd": 0.0, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979110962566845, "tokens_p.mean_below_band": 1.0477378964424133e-08, "tokens_p.mean_in_band": 0.11905679989270386, "tokens_rate.above_band": 0.8887832699619772, "tokens_rate.below_band": 0.0004752851711026616, "tokens_rate.in_band": 0.11074144486692016 }, { "epoch": 1.9215425531914894, "grad_norm": 105.30704816005407, "learning_rate": 1.601586155869706e-07, "loss": 0.1402, "step": 11560, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5105263157894737, "success_rate.epoch.env.logic": 0.6039719626168224, "success_rate.epoch.env.math": 0.9823943661971831, "success_rate.epoch.env.sat": 0.10625, "success_rate.epoch.env.science": 0.9631253737293203, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6068973957812754, "success_rate.epoch.global": 0.8640215716486903, "success_rate.window.env.abd": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.988233024691358, "tokens_p.mean_in_band": 0.7971754807692307, "tokens_rate.above_band": 0.9614243323442137, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03857566765578635 }, { "epoch": 1.922373670212766, "grad_norm": 67.44575219000683, "learning_rate": 1.6015528423923873e-07, "loss": 0.3516, "step": 11565, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5105263157894737, "success_rate.epoch.env.logic": 0.6039719626168224, "success_rate.epoch.env.math": 0.9824098504837291, "success_rate.epoch.env.sat": 0.1059190031152648, "success_rate.epoch.env.science": 0.9631400677425782, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6068700486371909, "success_rate.epoch.global": 0.8639630390143738, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.994304906542056, "tokens_p.mean_in_band": 0.5813802083333334, "tokens_rate.above_band": 0.9469026548672567, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05309734513274336 }, { "epoch": 1.9232047872340425, "grad_norm": 51.755969445858355, "learning_rate": 1.6015198811101555e-07, "loss": 0.1707, "step": 11570, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.45054945054945056, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5105263157894737, "success_rate.epoch.env.logic": 0.6044340723453909, "success_rate.epoch.env.math": 0.9824253075571178, "success_rate.epoch.env.sat": 0.10559006211180125, "success_rate.epoch.env.science": 0.9631620868180008, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6068855618075468, "success_rate.epoch.global": 0.8639394716593999, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9981449771689498, "tokens_p.mean_in_band": 0.6479166666666667, "tokens_rate.above_band": 0.8795180722891566, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.12048192771084337 }, { "epoch": 1.924035904255319, "grad_norm": 128.59260443764907, "learning_rate": 1.60148727208151e-07, "loss": 0.311, "step": 11575, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5105263157894737, "success_rate.epoch.env.logic": 0.6044340723453909, "success_rate.epoch.env.math": 0.9824253075571178, "success_rate.epoch.env.sat": 0.10559006211180125, "success_rate.epoch.env.science": 0.9631767515923567, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6071598460614751, "success_rate.epoch.global": 0.8639917959235995, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9957932692307693, "tokens_p.mean_in_band": 0.7942708333333334, "tokens_rate.above_band": 0.9928400954653938, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007159904534606206 }, { "epoch": 1.9248670212765957, "grad_norm": 419.99941972326997, "learning_rate": 1.6014550153643224e-07, "loss": 0.1746, "step": 11580, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5105263157894737, "success_rate.epoch.env.logic": 0.6053550640279395, "success_rate.epoch.env.math": 0.9824253075571178, "success_rate.epoch.env.sat": 0.10559006211180125, "success_rate.epoch.env.science": 0.9632279864838005, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6072482302954744, "success_rate.epoch.global": 0.8641485275288092, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9939427312775331, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.9256981382978724, "grad_norm": 77.10944733958563, "learning_rate": 1.6014231110158418e-07, "loss": 0.2642, "step": 11585, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5078534031413613, "success_rate.epoch.env.logic": 0.6053550640279395, "success_rate.epoch.env.math": 0.9824253075571178, "success_rate.epoch.env.sat": 0.10559006211180125, "success_rate.epoch.env.science": 0.9632499006754073, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6070072304357921, "success_rate.epoch.global": 0.8640900947018173, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.25, "success_rate.window.global": 0.5, "tokens_p.mean_above_band": 0.9919735599622285, "tokens_p.mean_below_band": 2.0256265997886658e-08, "tokens_p.mean_in_band": 0.43050080128205126, "tokens_rate.above_band": 0.8125839248033762, "tokens_rate.below_band": 0.0003836562440053712, "tokens_rate.in_band": 0.18703241895261846 }, { "epoch": 1.9265292553191489, "grad_norm": 31.47118614855034, "learning_rate": 1.60139155909269e-07, "loss": 0.1358, "step": 11590, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5078534031413613, "success_rate.epoch.env.logic": 0.6053550640279395, "success_rate.epoch.env.math": 0.9824253075571178, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.9632863663425283, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6069808269312518, "success_rate.epoch.global": 0.8640664961636829, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9908854166666666, "tokens_p.mean_in_band": 0.66357421875, "tokens_rate.above_band": 0.9310344827586207, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06896551724137931 }, { "epoch": 1.9273603723404256, "grad_norm": 72.91379036386866, "learning_rate": 1.6013603596508636e-07, "loss": 0.349, "step": 11595, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5078534031413613, "success_rate.epoch.env.logic": 0.6053550640279395, "success_rate.epoch.env.math": 0.9824407374890255, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.9633009323546915, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6069835538352583, "success_rate.epoch.global": 0.8641186245685798, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9949894514767933, "tokens_p.mean_in_band": 0.6583180147058824, "tokens_rate.above_band": 0.9330708661417323, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06692913385826772 }, { "epoch": 1.9281914893617023, "grad_norm": 136.0583169871391, "learning_rate": 1.6013295127457344e-07, "loss": 0.2716, "step": 11600, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5104166666666666, "success_rate.epoch.env.logic": 0.6053550640279395, "success_rate.epoch.env.math": 0.9824407374890255, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.9633300297324083, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6072192230082603, "success_rate.epoch.global": 0.8642054164537557, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981896551724138, "tokens_p.mean_in_band": 0.7078125, "tokens_rate.above_band": 0.9931506849315068, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00684931506849315 }, { "epoch": 1.9290226063829787, "grad_norm": 63.76627651321307, "learning_rate": 1.601299018432048e-07, "loss": 0.1927, "step": 11605, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5104166666666666, "success_rate.epoch.env.logic": 0.6053550640279395, "success_rate.epoch.env.math": 0.9824407374890255, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.963351822503962, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6072212041693106, "success_rate.epoch.global": 0.8642574383859022, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9953022875816994, "tokens_p.mean_in_band": 0.556640625, "tokens_rate.above_band": 0.9849785407725322, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015021459227467811 }, { "epoch": 1.9298537234042552, "grad_norm": 100.80777613347965, "learning_rate": 1.6012688767639244e-07, "loss": 0.2938, "step": 11610, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5104166666666666, "success_rate.epoch.env.logic": 0.6053550640279395, "success_rate.epoch.env.math": 0.9824561403508771, "success_rate.epoch.env.sat": 0.10526315789473684, "success_rate.epoch.env.science": 0.9633735893882399, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6072245832371406, "success_rate.epoch.global": 0.8643267389917039, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.995136186770428, "tokens_p.mean_in_band": 0.6026041666666667, "tokens_rate.above_band": 0.8509933774834437, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.1490066225165563 }, { "epoch": 1.930684840425532, "grad_norm": 78.38160099594734, "learning_rate": 1.6012390877948577e-07, "loss": 0.302, "step": 11615, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5104166666666666, "success_rate.epoch.env.logic": 0.6053550640279395, "success_rate.epoch.env.math": 0.9824561403508771, "success_rate.epoch.env.sat": 0.10493827160493827, "success_rate.epoch.env.science": 0.9633953304313415, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6071970245783499, "success_rate.epoch.global": 0.8642684015818344, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99231843575419, "tokens_p.mean_in_band": 0.54052734375, "tokens_rate.above_band": 0.9179487179487179, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08205128205128205 }, { "epoch": 1.9315159574468086, "grad_norm": 36.60540569956556, "learning_rate": 1.6012096515777157e-07, "loss": 0.2433, "step": 11620, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5104166666666666, "success_rate.epoch.env.logic": 0.6046511627906976, "success_rate.epoch.env.math": 0.9824868651488616, "success_rate.epoch.env.sat": 0.10493827160493827, "success_rate.epoch.env.science": 0.9634098101265823, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6071371430561666, "success_rate.epoch.global": 0.8642274349821519, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9979596219931272, "tokens_p.mean_in_band": 0.5388849431818182, "tokens_rate.above_band": 0.9635761589403974, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03642384105960265 }, { "epoch": 1.932347074468085, "grad_norm": 50.23443645240581, "learning_rate": 1.601180568164741e-07, "loss": 0.2077, "step": 11625, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5104166666666666, "success_rate.epoch.env.logic": 0.6046511627906976, "success_rate.epoch.env.math": 0.9824868651488616, "success_rate.epoch.env.sat": 0.10493827160493827, "success_rate.epoch.env.science": 0.9634387351778656, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6071397726062832, "success_rate.epoch.global": 0.8642966360856269, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9883928571428572, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.9331781914893615, "grad_norm": 72.71656781770703, "learning_rate": 1.6011518376075492e-07, "loss": 0.2095, "step": 11630, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5104166666666666, "success_rate.epoch.env.logic": 0.6046511627906976, "success_rate.epoch.env.math": 0.9824868651488616, "success_rate.epoch.env.sat": 0.10493827160493827, "success_rate.epoch.env.science": 0.9634748272458046, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6071430537033686, "success_rate.epoch.global": 0.8643830383293009, "success_rate.window.env.abd": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.99609375, "tokens_p.mean_in_band": 0.5020833333333333, "tokens_rate.above_band": 0.9180327868852459, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.08196721311475409 }, { "epoch": 1.9340093085106385, "grad_norm": 150.023843346747, "learning_rate": 1.60112345995713e-07, "loss": 0.3234, "step": 11635, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5129533678756477, "success_rate.epoch.env.logic": 0.6046511627906976, "success_rate.epoch.env.math": 0.9825021872265967, "success_rate.epoch.env.sat": 0.10493827160493827, "success_rate.epoch.env.science": 0.9634964483030781, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.607377021371004, "success_rate.epoch.global": 0.864469330618478, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9958433014354067, "tokens_p.mean_in_band": 0.6865530303030303, "tokens_rate.above_band": 0.9693877551020408, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030612244897959183 }, { "epoch": 1.934840425531915, "grad_norm": 43.87504898626464, "learning_rate": 1.6010954352638469e-07, "loss": 0.2944, "step": 11640, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5129533678756477, "success_rate.epoch.env.logic": 0.6046511627906976, "success_rate.epoch.env.math": 0.9825174825174825, "success_rate.epoch.env.sat": 0.10461538461538461, "success_rate.epoch.env.science": 0.9635108481262328, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6073503675641393, "success_rate.epoch.global": 0.8644110913253625, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9957275390625, "tokens_p.mean_in_band": 0.6526041666666667, "tokens_rate.above_band": 0.8504983388704319, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.14950166112956811 }, { "epoch": 1.9356715425531914, "grad_norm": 39.30108798018139, "learning_rate": 1.6010677635774368e-07, "loss": 0.3328, "step": 11645, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5129533678756477, "success_rate.epoch.env.logic": 0.6051103368176539, "success_rate.epoch.env.math": 0.982532751091703, "success_rate.epoch.env.sat": 0.10461538461538461, "success_rate.epoch.env.science": 0.9635252365930599, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6073948067521395, "success_rate.epoch.global": 0.8644800406814137, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9943379790940766, "tokens_p.mean_in_band": 0.69873046875, "tokens_rate.above_band": 0.9728813559322034, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02711864406779661 }, { "epoch": 1.936502659574468, "grad_norm": 131.2609116536188, "learning_rate": 1.60104044494701e-07, "loss": 0.2113, "step": 11650, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5129533678756477, "success_rate.epoch.env.logic": 0.6060254924681344, "success_rate.epoch.env.math": 0.982532751091703, "success_rate.epoch.env.sat": 0.10429447852760736, "success_rate.epoch.env.science": 0.9635539795114263, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6074514424322368, "success_rate.epoch.global": 0.8644735170837038, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9931837248322147, "tokens_p.mean_in_band": 0.48583984375, "tokens_rate.above_band": 0.9490445859872612, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.050955414012738856 }, { "epoch": 1.9373337765957448, "grad_norm": 44.578036732586504, "learning_rate": 1.6010134794210506e-07, "loss": 0.3518, "step": 11655, "success_rate.epoch.env.abd": 0.6041666666666666, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5129533678756477, "success_rate.epoch.env.logic": 0.6060254924681344, "success_rate.epoch.env.math": 0.982532751091703, "success_rate.epoch.env.sat": 0.10429447852760736, "success_rate.epoch.env.science": 0.9635755069895648, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6074533994757039, "success_rate.epoch.global": 0.8645251396648045, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 0.8571428571428571, "success_rate.window.env_macro_mean": 0.4642857142857143, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9953880407124682, "tokens_p.mean_in_band": 0.67171875, "tokens_rate.above_band": 0.9401913875598086, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05980861244019139 }, { "epoch": 1.9381648936170213, "grad_norm": 29.562613054775344, "learning_rate": 1.6009868670474159e-07, "loss": 0.2624, "step": 11660, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5129533678756477, "success_rate.epoch.env.logic": 0.6060254924681344, "success_rate.epoch.env.math": 0.982532751091703, "success_rate.epoch.env.sat": 0.10397553516819572, "success_rate.epoch.env.science": 0.9636113296616837, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6081620458943606, "success_rate.epoch.global": 0.864518584295319, "success_rate.window.env.abd": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.985440797940798, "tokens_p.mean_in_band": 0.6666165865384616, "tokens_rate.above_band": 0.9372738238841978, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06272617611580217 }, { "epoch": 1.9389960106382977, "grad_norm": 18.754887507071533, "learning_rate": 1.600960607873336e-07, "loss": 0.3717, "step": 11665, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5103092783505154, "success_rate.epoch.env.logic": 0.6064814814814815, "success_rate.epoch.env.math": 0.982532751091703, "success_rate.epoch.env.sat": 0.10397553516819572, "success_rate.epoch.env.science": 0.9636399371069182, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6079657283428561, "success_rate.epoch.global": 0.8644948662694891, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9929395151800423, "tokens_p.mean_below_band": 1.8533319234848022e-07, "tokens_p.mean_in_band": 0.5500758725341427, "tokens_rate.above_band": 0.8655530657975148, "tokens_rate.below_band": 0.00020370747606437156, "tokens_rate.in_band": 0.13424322672642086 }, { "epoch": 1.9398271276595744, "grad_norm": 205.29775223353275, "learning_rate": 1.6009347019454146e-07, "loss": 0.2074, "step": 11670, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5103092783505154, "success_rate.epoch.env.logic": 0.6064814814814815, "success_rate.epoch.env.math": 0.982532751091703, "success_rate.epoch.env.sat": 0.10365853658536585, "success_rate.epoch.env.science": 0.963668499607227, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6079395068808087, "success_rate.epoch.global": 0.8644540157081327, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9897388059701493, "tokens_p.mean_in_band": 0.7215909090909091, "tokens_rate.above_band": 0.9481132075471698, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05188679245283019 }, { "epoch": 1.9406582446808511, "grad_norm": 101.35100395200453, "learning_rate": 1.6009091493096285e-07, "loss": 0.3524, "step": 11675, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5103092783505154, "success_rate.epoch.env.logic": 0.6064814814814815, "success_rate.epoch.env.math": 0.9825479930191972, "success_rate.epoch.env.sat": 0.10365853658536585, "success_rate.epoch.env.science": 0.963682764036121, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.607942189276844, "success_rate.epoch.global": 0.8645055084209193, "success_rate.window.env.agentgym:textcraft": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9959211847389559, "tokens_p.mean_in_band": 0.657608695652174, "tokens_rate.above_band": 0.9352112676056338, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0647887323943662 }, { "epoch": 1.9414893617021276, "grad_norm": 589.0290952972535, "learning_rate": 1.6008839500113275e-07, "loss": 0.304, "step": 11680, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5103092783505154, "success_rate.epoch.env.logic": 0.6064814814814815, "success_rate.epoch.env.math": 0.9825479930191972, "success_rate.epoch.env.sat": 0.10365853658536585, "success_rate.epoch.env.science": 0.9636970172684458, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6079434850252372, "success_rate.epoch.global": 0.8645398151664768, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9952554744525547, "tokens_p.mean_in_band": 0.82763671875, "tokens_rate.above_band": 0.9884559884559885, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011544011544011544 }, { "epoch": 1.9423204787234043, "grad_norm": 34.78099548310638, "learning_rate": 1.600859104095234e-07, "loss": 0.137, "step": 11685, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5103092783505154, "success_rate.epoch.env.logic": 0.6069364161849711, "success_rate.epoch.env.math": 0.9825632083696599, "success_rate.epoch.env.sat": 0.10365853658536585, "success_rate.epoch.env.science": 0.9637183761521867, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6079881676559366, "success_rate.epoch.global": 0.8646255060728745, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9943181818181818, "tokens_p.mean_below_band": 1.3445969671010971e-08, "tokens_p.mean_in_band": 0.814453125, "tokens_rate.above_band": 0.98828125, "tokens_rate.below_band": 0.00390625, "tokens_rate.in_band": 0.0078125 }, { "epoch": 1.943151595744681, "grad_norm": 40.322712040613666, "learning_rate": 1.6008346116054432e-07, "loss": 0.2363, "step": 11690, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5103092783505154, "success_rate.epoch.env.logic": 0.6069364161849711, "success_rate.epoch.env.math": 0.9817391304347826, "success_rate.epoch.env.sat": 0.10365853658536585, "success_rate.epoch.env.science": 0.9637397099176793, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6079151909132652, "success_rate.epoch.global": 0.8646017699115044, "success_rate.window.env.math": 0.6666666666666666, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9856459330143541, "tokens_p.mean_below_band": 2.066371962428093e-09, "tokens_p.mean_in_band": 0.7113095238095238, "tokens_rate.above_band": 0.9047619047619048, "tokens_rate.below_band": 0.004329004329004329, "tokens_rate.in_band": 0.09090909090909091 }, { "epoch": 1.9439827127659575, "grad_norm": 37.90494366317641, "learning_rate": 1.6008104725854236e-07, "loss": 0.172, "step": 11695, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5103092783505154, "success_rate.epoch.env.logic": 0.6069364161849711, "success_rate.epoch.env.math": 0.9817549956559514, "success_rate.epoch.env.sat": 0.10365853658536585, "success_rate.epoch.env.science": 0.9637610186092067, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6079185703598738, "success_rate.epoch.global": 0.8646702047005307, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959761705685619, "tokens_p.mean_in_band": 0.6684782608695652, "tokens_rate.above_band": 0.9811320754716981, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.018867924528301886 }, { "epoch": 1.944813829787234, "grad_norm": 1320.1959759929207, "learning_rate": 1.600786687078016e-07, "loss": 0.275, "step": 11700, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5103092783505154, "success_rate.epoch.env.logic": 0.6073903002309469, "success_rate.epoch.env.math": 0.9817549956559514, "success_rate.epoch.env.sat": 0.10365853658536585, "success_rate.epoch.env.science": 0.9637823022709475, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6079617674242118, "success_rate.epoch.global": 0.864738570346047, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9984772978959026, "tokens_p.mean_in_band": 0.6607142857142857, "tokens_rate.above_band": 0.9699248120300752, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03007518796992481 }, { "epoch": 1.9456449468085106, "grad_norm": 110.81681174003866, "learning_rate": 1.6007632551254337e-07, "loss": 0.3102, "step": 11705, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.453551912568306, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5103092783505154, "success_rate.epoch.env.logic": 0.6073903002309469, "success_rate.epoch.env.math": 0.9817549956559514, "success_rate.epoch.env.sat": 0.1033434650455927, "success_rate.epoch.env.science": 0.9638177195384314, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6079363443085491, "success_rate.epoch.global": 0.8647147905098436, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9974137931034482, "tokens_p.mean_in_band": 0.6421440972222222, "tokens_rate.above_band": 0.9602649006622517, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.039735099337748346 }, { "epoch": 1.9464760638297873, "grad_norm": 145.76844776322204, "learning_rate": 1.6007401767692628e-07, "loss": 0.2198, "step": 11710, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45108695652173914, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5103092783505154, "success_rate.epoch.env.logic": 0.6073903002309469, "success_rate.epoch.env.math": 0.9817708333333334, "success_rate.epoch.env.sat": 0.1033434650455927, "success_rate.epoch.env.science": 0.9638318670576735, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6077149833221908, "success_rate.epoch.global": 0.864656912209889, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9937900641025641, "tokens_p.mean_in_band": 0.46875, "tokens_rate.above_band": 0.9968051118210862, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003194888178913738 }, { "epoch": 1.9473071808510638, "grad_norm": 109.81470120783514, "learning_rate": 1.6007174520504617e-07, "loss": 0.2408, "step": 11715, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45108695652173914, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5103092783505154, "success_rate.epoch.env.logic": 0.6073903002309469, "success_rate.epoch.env.math": 0.9817708333333334, "success_rate.epoch.env.sat": 0.1033434650455927, "success_rate.epoch.env.science": 0.9638389366692729, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6077156260141543, "success_rate.epoch.global": 0.8646739815865809, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9896472392638037, "tokens_p.mean_in_band": 0.80859375, "tokens_rate.above_band": 0.9969418960244648, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0030581039755351682 }, { "epoch": 1.9481382978723403, "grad_norm": 47.76625958989531, "learning_rate": 1.6006950810093612e-07, "loss": 0.3269, "step": 11720, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45108695652173914, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.5263157894736842, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5128205128205128, "success_rate.epoch.env.logic": 0.6073903002309469, "success_rate.epoch.env.math": 0.9817708333333334, "success_rate.epoch.env.sat": 0.10303030303030303, "success_rate.epoch.env.science": 0.9638742433118531, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6079186604775442, "success_rate.epoch.global": 0.8646673387096774, "success_rate.window.env.ded": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9905660377358491, "tokens_p.mean_in_band": 0.685546875, "tokens_rate.above_band": 0.9675456389452333, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.032454361054766734 }, { "epoch": 1.948969414893617, "grad_norm": 146.06397793830362, "learning_rate": 1.6006730636856645e-07, "loss": 0.1794, "step": 11725, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45108695652173914, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5128205128205128, "success_rate.epoch.env.logic": 0.6073903002309469, "success_rate.epoch.env.math": 0.9818024263431543, "success_rate.epoch.env.sat": 0.10303030303030303, "success_rate.epoch.env.science": 0.9639024390243902, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6100772058637872, "success_rate.epoch.global": 0.864786604557472, "success_rate.window.env.agentgym:sciworld": 1.0, "success_rate.window.env.agentgym:textcraft": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9962732160312805, "tokens_p.mean_in_band": 0.69140625, "tokens_rate.above_band": 0.9980487804878049, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001951219512195122 }, { "epoch": 1.9498005319148937, "grad_norm": 49.50932154641662, "learning_rate": 1.6006514001184467e-07, "loss": 0.2456, "step": 11730, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45108695652173914, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5153061224489796, "success_rate.epoch.env.logic": 0.6073903002309469, "success_rate.epoch.env.math": 0.9818024263431543, "success_rate.epoch.env.sat": 0.1027190332326284, "success_rate.epoch.env.science": 0.9639376218323586, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.61027807155822, "success_rate.epoch.global": 0.8647798742138365, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9963461538461539, "tokens_p.mean_in_band": 0.6413845486111112, "tokens_rate.above_band": 0.9730538922155688, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02694610778443114 }, { "epoch": 1.9506316489361701, "grad_norm": 43.47013685341931, "learning_rate": 1.6006300903461556e-07, "loss": 0.1934, "step": 11735, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45108695652173914, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5153061224489796, "success_rate.epoch.env.logic": 0.6073903002309469, "success_rate.epoch.env.math": 0.9818024263431543, "success_rate.epoch.env.sat": 0.1027190332326284, "success_rate.epoch.env.science": 0.9639657187378262, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6102806258223533, "success_rate.epoch.global": 0.8648478752828765, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0004615952732645, "tokens_p.mean_in_band": 0.6046875, "tokens_rate.above_band": 0.9442119944211994, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05578800557880056 }, { "epoch": 1.9514627659574468, "grad_norm": 113.79882856081655, "learning_rate": 1.6006091344066105e-07, "loss": 0.3311, "step": 11740, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45108695652173914, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5153061224489796, "success_rate.epoch.env.logic": 0.6073903002309469, "success_rate.epoch.env.math": 0.9818181818181818, "success_rate.epoch.env.sat": 0.1027190332326284, "success_rate.epoch.env.science": 0.9639867627019661, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.610283971225914, "success_rate.epoch.global": 0.8649158079919578, "success_rate.window.env.agentgym:alfworld": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333334, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9964759535655058, "tokens_p.mean_in_band": 0.5126953125, "tokens_rate.above_band": 0.996694214876033, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003305785123966942 }, { "epoch": 1.9522938829787235, "grad_norm": 245.4709719178445, "learning_rate": 1.6005885323370037e-07, "loss": 0.2172, "step": 11745, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45108695652173914, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5153061224489796, "success_rate.epoch.env.logic": 0.6073903002309469, "success_rate.epoch.env.math": 0.9818339100346021, "success_rate.epoch.env.sat": 0.1027190332326284, "success_rate.epoch.env.science": 0.9640077821011673, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6102873119182433, "success_rate.epoch.global": 0.8649836724441096, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9847808441558441, "tokens_p.mean_in_band": 0.8671875, "tokens_rate.above_band": 0.9746835443037974, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02531645569620253 }, { "epoch": 1.953125, "grad_norm": 64.98334378571631, "learning_rate": 1.600568284173898e-07, "loss": 0.2389, "step": 11750, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45108695652173914, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5153061224489796, "success_rate.epoch.env.logic": 0.6078431372549019, "success_rate.epoch.env.math": 0.9818496110630942, "success_rate.epoch.env.sat": 0.1027190332326284, "success_rate.epoch.env.science": 0.9640287769784173, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6103318149118521, "success_rate.epoch.global": 0.8650684071796159, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9915966386554622, "tokens_p.mean_in_band": 0.88671875, "tokens_rate.above_band": 0.9916666666666667, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.008333333333333333 }, { "epoch": 1.9539561170212765, "grad_norm": 7.218737346416608, "learning_rate": 1.6005483899532296e-07, "loss": 0.2972, "step": 11755, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45108695652173914, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5153061224489796, "success_rate.epoch.env.logic": 0.6071428571428571, "success_rate.epoch.env.math": 0.9818496110630942, "success_rate.epoch.env.sat": 0.1027190332326284, "success_rate.epoch.env.science": 0.9640427599611273, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6102694242637308, "success_rate.epoch.global": 0.8649937264742785, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9965909090909091, "tokens_p.mean_in_band": 0.6580882352941176, "tokens_rate.above_band": 0.9748892171344166, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025110782865583457 }, { "epoch": 1.9547872340425532, "grad_norm": 161.62315135336982, "learning_rate": 1.6005288497103056e-07, "loss": 0.2477, "step": 11760, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5153061224489796, "success_rate.epoch.env.logic": 0.6075949367088608, "success_rate.epoch.env.math": 0.9818809318377912, "success_rate.epoch.env.sat": 0.1027190332326284, "success_rate.epoch.env.science": 0.964063714063714, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6100936103155669, "success_rate.epoch.global": 0.8649868371568259, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.000062625250501, "tokens_p.mean_in_band": 0.7887834821428571, "tokens_rate.above_band": 0.9930348258706467, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006965174129353234 }, { "epoch": 1.9556183510638299, "grad_norm": 103.27120675218588, "learning_rate": 1.600509663479805e-07, "loss": 0.3003, "step": 11765, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6080459770114942, "success_rate.epoch.env.math": 0.9818965517241379, "success_rate.epoch.env.sat": 0.1027190332326284, "success_rate.epoch.env.science": 0.964063714063714, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6103597044248331, "success_rate.epoch.global": 0.8650375939849624, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.9, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9959896347482725, "tokens_p.mean_in_band": 0.62890625, "tokens_rate.above_band": 0.9921645445641528, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.007835455435847209 }, { "epoch": 1.9564494680851063, "grad_norm": 12.478963346983374, "learning_rate": 1.6004908312957787e-07, "loss": 0.0726, "step": 11770, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6080459770114942, "success_rate.epoch.env.math": 0.9819121447028424, "success_rate.epoch.env.sat": 0.1027190332326284, "success_rate.epoch.env.science": 0.9640846437584935, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6103630246678772, "success_rate.epoch.global": 0.8651052104208417, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9879629629629629, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9854014598540146, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014598540145985401 }, { "epoch": 1.957280585106383, "grad_norm": 2.6370722818089543, "learning_rate": 1.6004723531916496e-07, "loss": 0.2143, "step": 11775, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6080459770114942, "success_rate.epoch.env.math": 0.9819121447028424, "success_rate.epoch.env.sat": 0.10240963855421686, "success_rate.epoch.env.science": 0.9640985833495052, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6103361651144771, "success_rate.epoch.global": 0.8650306748466258, "success_rate.window.env.logic": 0.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9977235099337748, "tokens_p.mean_in_band": 0.5949519230769231, "tokens_rate.above_band": 0.9508816120906801, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0491183879093199 }, { "epoch": 1.9581117021276597, "grad_norm": 114.88816234701625, "learning_rate": 1.600454229200211e-07, "loss": 0.3689, "step": 11780, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6080459770114942, "success_rate.epoch.env.math": 0.9819277108433735, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9641125121241513, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6103108886111192, "success_rate.epoch.global": 0.864973094731573, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9991858717434869, "tokens_p.mean_in_band": 0.6026335685483871, "tokens_rate.above_band": 0.9698736637512148, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.03012633624878523 }, { "epoch": 1.9589428191489362, "grad_norm": 31.75549408991279, "learning_rate": 1.6004364593536298e-07, "loss": 0.2189, "step": 11785, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6084959816303099, "success_rate.epoch.env.math": 0.9819277108433735, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9641333850329585, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6103536956590849, "success_rate.epoch.global": 0.865040650406504, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.00037147102526, "tokens_p.mean_in_band": 0.751171875, "tokens_rate.above_band": 0.9853587115666179, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014641288433382138 }, { "epoch": 1.9597739361702127, "grad_norm": 105.39213209811822, "learning_rate": 1.6004190436834418e-07, "loss": 0.1621, "step": 11790, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6093928980526919, "success_rate.epoch.env.math": 0.9819277108433735, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9641611778380472, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6104377601343095, "success_rate.epoch.global": 0.8651418572678415, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.997300392670157, "tokens_p.mean_in_band": 0.84296875, "tokens_rate.above_band": 0.9934980494148244, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006501950585175552 }, { "epoch": 1.9606050531914894, "grad_norm": 63.19142893154268, "learning_rate": 1.6004019822205567e-07, "loss": 0.3089, "step": 11795, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6093928980526919, "success_rate.epoch.env.math": 0.9819277108433735, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9641819941916747, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6104396525300939, "success_rate.epoch.global": 0.8651924037981009, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9905, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.961436170212766, "grad_norm": 37.94166674320429, "learning_rate": 1.6003852749952535e-07, "loss": 0.4199, "step": 11800, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6093928980526919, "success_rate.epoch.env.math": 0.9819587628865979, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.964202786377709, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6104443656418447, "success_rate.epoch.global": 0.8652765638656511, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9946808510638298, "tokens_p.mean_in_band": 0.7001953125, "tokens_rate.above_band": 0.9362549800796812, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06374501992031872 }, { "epoch": 1.9622672872340425, "grad_norm": 106.78956689707896, "learning_rate": 1.6003689220371838e-07, "loss": 0.2297, "step": 11805, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6093928980526919, "success_rate.epoch.env.math": 0.9819897084048027, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.964230471771075, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6104496957247147, "success_rate.epoch.global": 0.8653774173424829, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9937185929648241, "tokens_p.mean_in_band": 0.6875, "tokens_rate.above_band": 0.9342723004694836, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06572769953051644 }, { "epoch": 1.963098404255319, "grad_norm": 31.97744419065312, "learning_rate": 1.60035292337537e-07, "loss": 0.1721, "step": 11810, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6098398169336384, "success_rate.epoch.env.math": 0.9820205479452054, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9642512077294686, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6104950133956004, "success_rate.epoch.global": 0.8654781199351702, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9931848404255319, "tokens_p.mean_in_band": 0.6116071428571429, "tokens_rate.above_band": 0.9306930693069307, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.06930693069306931 }, { "epoch": 1.9639295212765957, "grad_norm": 98.05815317194478, "learning_rate": 1.600337279038206e-07, "loss": 0.2481, "step": 11815, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6091428571428571, "success_rate.epoch.env.math": 0.9820205479452054, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.964278818304692, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6104341634669134, "success_rate.epoch.global": 0.8654373286817842, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9944769637462235, "tokens_p.mean_in_band": 0.5785845588235294, "tokens_rate.above_band": 0.9749631811487481, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.025036818851251842 }, { "epoch": 1.9647606382978724, "grad_norm": 208.6761857898844, "learning_rate": 1.6003219890534568e-07, "loss": 0.2379, "step": 11820, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6095890410958904, "success_rate.epoch.env.math": 0.9820359281437125, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9642926076047095, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.610477377417055, "success_rate.epoch.global": 0.8655043586550436, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.ded": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 0.9898196920583469, "tokens_p.mean_in_band": 0.6818773674242424, "tokens_rate.above_band": 0.9033674963396779, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09663250366032211 }, { "epoch": 1.9655917553191489, "grad_norm": 33.96308190594352, "learning_rate": 1.600307053448258e-07, "loss": 0.223, "step": 11825, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6104783599088838, "success_rate.epoch.env.math": 0.982051282051282, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9643063862627822, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.610560872996931, "success_rate.epoch.global": 0.865588052271313, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9976892605633803, "tokens_p.mean_in_band": 0.74609375, "tokens_rate.above_band": 0.993006993006993, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006993006993006993 }, { "epoch": 1.9664228723404256, "grad_norm": 30.886291072415105, "learning_rate": 1.6002924722491166e-07, "loss": 0.2455, "step": 11830, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6109215017064846, "success_rate.epoch.env.math": 0.982066609735269, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9643270343231778, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6106044291371112, "success_rate.epoch.global": 0.8656716417910447, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.5, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.875, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9948501872659176, "tokens_p.mean_in_band": 0.7961805555555556, "tokens_rate.above_band": 0.9595687331536388, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04043126684636118 }, { "epoch": 1.9672539893617023, "grad_norm": 24.43642020438126, "learning_rate": 1.6002782454819107e-07, "loss": 0.1401, "step": 11835, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6109215017064846, "success_rate.epoch.env.math": 0.982066609735269, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9643751203543232, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6106088005944881, "success_rate.epoch.global": 0.8657884926059402, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9953125, "tokens_p.mean_in_band": 0.5866268382352942, "tokens_rate.above_band": 0.89171974522293, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.10828025477707007 }, { "epoch": 1.9680851063829787, "grad_norm": 44.48580741880131, "learning_rate": 1.6002643731718896e-07, "loss": 0.1751, "step": 11840, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6111111111111112, "success_rate.epoch.env.math": 0.9820971867007673, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9643751203543232, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6106288175372266, "success_rate.epoch.global": 0.8657643114367316, "success_rate.window.env.babyai": 1.0, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.7777777777777777, "success_rate.window.global": 0.8, "tokens_p.mean_above_band": 1.0000259929299231, "tokens_p.mean_in_band": 0.6502403846153846, "tokens_rate.above_band": 0.994622543950362, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.005377456049638056 }, { "epoch": 1.9689162234042552, "grad_norm": 902.3799061009368, "learning_rate": 1.6002508553436733e-07, "loss": 0.2256, "step": 11845, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6108597285067874, "success_rate.epoch.env.math": 0.9820971867007673, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.964017702520685, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6105734720428665, "success_rate.epoch.global": 0.8654919965256235, "success_rate.window.env.logic": 0.5, "success_rate.window.env.science": 0.75, "success_rate.window.env_macro_mean": 0.625, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9941605839416059, "tokens_p.mean_below_band": 1.0870182531874661e-10, "tokens_p.mean_in_band": 0.4701450892857143, "tokens_rate.above_band": 0.9771754636233951, "tokens_rate.below_band": 0.0028530670470756064, "tokens_rate.in_band": 0.019971469329529243 }, { "epoch": 1.969747340425532, "grad_norm": 38.5737958106681, "learning_rate": 1.6002376920212524e-07, "loss": 0.2954, "step": 11850, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6101694915254238, "success_rate.epoch.env.math": 0.9821124361158433, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9640315445277938, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6105133679011231, "success_rate.epoch.global": 0.8654347017239241, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.5, "success_rate.window.global": 0.7, "tokens_p.mean_above_band": 0.9974804941482445, "tokens_p.mean_in_band": 0.6920731707317073, "tokens_rate.above_band": 0.9493827160493827, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.050617283950617285 }, { "epoch": 1.9705784574468086, "grad_norm": 45.23147280803937, "learning_rate": 1.6002248832279885e-07, "loss": 0.3795, "step": 11855, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6094808126410836, "success_rate.epoch.env.math": 0.9821124361158433, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9640453758892521, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6104520181263156, "success_rate.epoch.global": 0.8653607736176544, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.3333333333333333, "success_rate.window.global": 0.6, "tokens_p.mean_above_band": 0.9998293515358362, "tokens_p.mean_in_band": 0.564520474137931, "tokens_rate.above_band": 0.9805890227576974, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019410977242302542 }, { "epoch": 1.971409574468085, "grad_norm": 218.1468518277248, "learning_rate": 1.600212428986614e-07, "loss": 0.2677, "step": 11860, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6094808126410836, "success_rate.epoch.env.math": 0.9821428571428571, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9640591966173362, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6104560401040517, "success_rate.epoch.global": 0.8654275092936803, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9939903846153846, "tokens_p.mean_in_band": 0.65732421875, "tokens_rate.above_band": 0.9212598425196851, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.07874015748031496 }, { "epoch": 1.9722406914893615, "grad_norm": 180.57155675960868, "learning_rate": 1.600200329319233e-07, "loss": 0.2924, "step": 11865, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6099210822998873, "success_rate.epoch.env.math": 0.9821428571428571, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.964086806222393, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6104985745825845, "success_rate.epoch.global": 0.8655108359133127, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9937267657992565, "tokens_p.mean_in_band": 0.5, "tokens_rate.above_band": 0.9438596491228071, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.056140350877192984 }, { "epoch": 1.9730718085106385, "grad_norm": 49.95463616318855, "learning_rate": 1.600188584247318e-07, "loss": 0.1861, "step": 11870, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6103603603603603, "success_rate.epoch.env.math": 0.9821428571428571, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9641143734407983, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6105410150624825, "success_rate.epoch.global": 0.8655940594059406, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9973387096774193, "tokens_p.mean_in_band": 0.8828125, "tokens_rate.above_band": 0.9987113402061856, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.001288659793814433 }, { "epoch": 1.973902925531915, "grad_norm": 41.06240945683496, "learning_rate": 1.6001771937917145e-07, "loss": 0.3976, "step": 11875, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6103603603603603, "success_rate.epoch.env.math": 0.9821428571428571, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9641418983700862, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6105435173287815, "success_rate.epoch.global": 0.8656605640771895, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.99125, "tokens_p.mean_in_band": 0.76953125, "tokens_rate.above_band": 0.9868421052631579, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.013157894736842105 }, { "epoch": 1.9747340425531914, "grad_norm": 44.796388427524, "learning_rate": 1.6001661579726378e-07, "loss": 0.1294, "step": 11880, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4486486486486487, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6103603603603603, "success_rate.epoch.env.math": 0.9821428571428571, "success_rate.epoch.env.sat": 0.1021021021021021, "success_rate.epoch.env.science": 0.9641556450067088, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6105447670230199, "success_rate.epoch.global": 0.8656937917388078, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8333333333333333, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954879679144385, "tokens_p.mean_in_band": 0.61015625, "tokens_rate.above_band": 0.9739583333333334, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.026041666666666668 }, { "epoch": 1.975565159574468, "grad_norm": 259.56449823520194, "learning_rate": 1.6001554768096737e-07, "loss": 0.1811, "step": 11885, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6103603603603603, "success_rate.epoch.env.math": 0.9821428571428571, "success_rate.epoch.env.sat": 0.10179640718562874, "success_rate.epoch.env.science": 0.9641693811074918, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6107877030013351, "success_rate.epoch.global": 0.8656365883807169, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.998688352570829, "tokens_p.mean_in_band": 0.6942085597826086, "tokens_rate.above_band": 0.9764344262295082, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0235655737704918 }, { "epoch": 1.9763962765957448, "grad_norm": 2.042692115642958, "learning_rate": 1.6001451503217784e-07, "loss": 0.1616, "step": 11890, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6103603603603603, "success_rate.epoch.env.math": 0.9821428571428571, "success_rate.epoch.env.sat": 0.10179640718562874, "success_rate.epoch.env.science": 0.9642105263157895, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6107914434748167, "success_rate.epoch.global": 0.8657361660079052, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9888888888888889, "tokens_p.mean_in_band": 0.8125, "tokens_rate.above_band": 0.9782608695652174, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.021739130434782608 }, { "epoch": 1.9772273936170213, "grad_norm": 48.656632719206854, "learning_rate": 1.600135178527279e-07, "loss": 0.2001, "step": 11895, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6103603603603603, "success_rate.epoch.env.math": 0.9821428571428571, "success_rate.epoch.env.sat": 0.10179640718562874, "success_rate.epoch.env.science": 0.9642379039969401, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6107939323549213, "success_rate.epoch.global": 0.8658024691358025, "success_rate.window.env.ded": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963219501718213, "tokens_p.mean_in_band": 0.5242147125322998, "tokens_rate.above_band": 0.9002320185614849, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.09976798143851508 }, { "epoch": 1.9780585106382977, "grad_norm": 264.6661843641287, "learning_rate": 1.6001255614438732e-07, "loss": 0.4194, "step": 11900, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6107986501687289, "success_rate.epoch.env.math": 0.9821428571428571, "success_rate.epoch.env.sat": 0.10179640718562874, "success_rate.epoch.env.science": 0.9642515771363028, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6108350198956242, "success_rate.epoch.global": 0.8658521535233864, "success_rate.window.env.logic": 0.6666666666666666, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8888888888888888, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9988215214564369, "tokens_p.mean_in_band": 0.6479166666666667, "tokens_rate.above_band": 0.9808673469387755, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01913265306122449 }, { "epoch": 1.9788896276595744, "grad_norm": 49.17219293096067, "learning_rate": 1.600116299088629e-07, "loss": 0.1593, "step": 11905, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6112359550561798, "success_rate.epoch.env.math": 0.9821428571428571, "success_rate.epoch.env.sat": 0.10179640718562874, "success_rate.epoch.env.science": 0.9642857142857143, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6108778782626116, "success_rate.epoch.global": 0.8659514120113454, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980006954102921, "tokens_p.mean_in_band": 0.688125, "tokens_rate.above_band": 0.9663978494623656, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.033602150537634407 }, { "epoch": 1.9797207446808511, "grad_norm": 71.9051328825867, "learning_rate": 1.6001073914779846e-07, "loss": 0.2129, "step": 11910, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6112359550561798, "success_rate.epoch.env.math": 0.9821731748726655, "success_rate.epoch.env.sat": 0.10179640718562874, "success_rate.epoch.env.science": 0.964299350897289, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6108818741118283, "success_rate.epoch.global": 0.8660175027733268, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9926330798479087, "tokens_p.mean_in_band": 0.8515625, "tokens_rate.above_band": 0.9962121212121212, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003787878787878788 }, { "epoch": 1.9805518617021276, "grad_norm": 38.53772234575872, "learning_rate": 1.6000988386277486e-07, "loss": 0.1646, "step": 11915, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.611672278338945, "success_rate.epoch.env.math": 0.9821882951653944, "success_rate.epoch.env.sat": 0.10179640718562874, "success_rate.epoch.env.science": 0.9643129770992367, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6109241531825048, "success_rate.epoch.global": 0.866083528397191, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9920520231213873, "tokens_p.mean_in_band": 0.888671875, "tokens_rate.above_band": 0.9885714285714285, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011428571428571429 }, { "epoch": 1.9813829787234043, "grad_norm": 52.66855415226529, "learning_rate": 1.6000906405531004e-07, "loss": 0.1282, "step": 11920, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.611672278338945, "success_rate.epoch.env.math": 0.9822033898305085, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.964333396910166, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6111711276580242, "success_rate.epoch.global": 0.8661659689731593, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9942715458276333, "tokens_p.mean_in_band": 0.80126953125, "tokens_rate.above_band": 0.9891745602165088, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.010825439783491205 }, { "epoch": 1.982214095744681, "grad_norm": 381.1455252332628, "learning_rate": 1.60008279726859e-07, "loss": 0.3417, "step": 11925, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.611672278338945, "success_rate.epoch.env.math": 0.9822033898305085, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.9643605870020965, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6111735994845634, "success_rate.epoch.global": 0.8662318483878907, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9961721224920802, "tokens_p.mean_in_band": 0.7563344594594594, "tokens_rate.above_band": 0.9808389435525634, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019161056447436563 }, { "epoch": 1.9830452127659575, "grad_norm": 57.3916124603512, "learning_rate": 1.6000753087881368e-07, "loss": 0.2661, "step": 11930, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6121076233183856, "success_rate.epoch.env.math": 0.9822033898305085, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.9643877356693963, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6112156443615397, "success_rate.epoch.global": 0.8663141065059649, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9981884057971014, "tokens_p.mean_in_band": 0.8489583333333334, "tokens_rate.above_band": 0.9960629921259843, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.003937007874015748 }, { "epoch": 1.983876329787234, "grad_norm": 252.33691947630146, "learning_rate": 1.6000681751250312e-07, "loss": 0.2123, "step": 11935, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6121076233183856, "success_rate.epoch.env.math": 0.9822335025380711, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.9643877356693963, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.611218381880409, "success_rate.epoch.global": 0.8663469814336653, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9945436507936508, "tokens_p.mean_in_band": 0.875, "tokens_rate.above_band": 0.9882352941176471, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011764705882352941 }, { "epoch": 1.9847074468085106, "grad_norm": 34.148575748884376, "learning_rate": 1.6000613962919334e-07, "loss": 0.2007, "step": 11940, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6121076233183856, "success_rate.epoch.env.math": 0.9822784810126582, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.9644080700418729, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6112243194119603, "success_rate.epoch.global": 0.8664455092763239, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9923930921052632, "tokens_rate.above_band": 1.0, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0 }, { "epoch": 1.9855385638297873, "grad_norm": 311.49618416577687, "learning_rate": 1.6000549723008746e-07, "loss": 0.1462, "step": 11945, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6121076233183856, "success_rate.epoch.env.math": 0.9822934232715008, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.9644419091081955, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6112287540778844, "success_rate.epoch.global": 0.8665438919582567, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9976817507418397, "tokens_p.mean_in_band": 0.5458984375, "tokens_rate.above_band": 0.9825072886297376, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01749271137026239 }, { "epoch": 1.9863696808510638, "grad_norm": 126.06556815060515, "learning_rate": 1.6000489031632557e-07, "loss": 0.1991, "step": 11950, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6129753914988815, "success_rate.epoch.env.math": 0.9823083403538332, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.9644554267249572, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.611310227066938, "success_rate.epoch.global": 0.8666257668711657, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9980184511434511, "tokens_p.mean_in_band": 0.6692708333333334, "tokens_rate.above_band": 0.993801652892562, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.006198347107438017 }, { "epoch": 1.9872007978723403, "grad_norm": 66.6505551853555, "learning_rate": 1.600043188889848e-07, "loss": 0.223, "step": 11955, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6129753914988815, "success_rate.epoch.env.math": 0.9823232323232324, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.9644756838905775, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6113134224428489, "success_rate.epoch.global": 0.866691194505764, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.992070895522388, "tokens_p.mean_in_band": 0.890625, "tokens_rate.above_band": 0.9852941176470589, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.014705882352941176 }, { "epoch": 1.988031914893617, "grad_norm": 17.263431324826506, "learning_rate": 1.6000378294907925e-07, "loss": 0.2385, "step": 11960, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45161290322580644, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6129753914988815, "success_rate.epoch.env.math": 0.9823380992430614, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.9644959179798747, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6113166134436786, "success_rate.epoch.global": 0.866756557979897, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9945454545454545, "tokens_p.mean_in_band": 0.6960018382352942, "tokens_rate.above_band": 0.9417808219178082, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.05821917808219178 }, { "epoch": 1.9888630319148937, "grad_norm": 67.10608484627082, "learning_rate": 1.6000328249756015e-07, "loss": 0.237, "step": 11965, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6129753914988815, "success_rate.epoch.env.math": 0.9823825503355704, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.9645228609372035, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6115896993863591, "success_rate.epoch.global": 0.866887092823904, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.000484785202864, "tokens_p.mean_in_band": 0.6617647058823529, "tokens_rate.above_band": 0.9801169590643275, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.019883040935672516 }, { "epoch": 1.9896941489361701, "grad_norm": 79.86265263468553, "learning_rate": 1.6000281753531564e-07, "loss": 0.1332, "step": 11970, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45454545454545453, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6129753914988815, "success_rate.epoch.env.math": 0.9823825503355704, "success_rate.epoch.env.sat": 0.1044776119402985, "success_rate.epoch.env.science": 0.9645295902883156, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6115903111455512, "success_rate.epoch.global": 0.866903391698298, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.992446682464455, "tokens_p.mean_in_band": 0.86328125, "tokens_rate.above_band": 0.9906103286384976, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.009389671361502348 }, { "epoch": 1.9905252659574468, "grad_norm": 51.38539418045454, "learning_rate": 1.600023880631709e-07, "loss": 0.1783, "step": 11975, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4574468085106383, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6129753914988815, "success_rate.epoch.env.math": 0.9823825503355704, "success_rate.epoch.env.sat": 0.10416666666666667, "success_rate.epoch.env.science": 0.9645430413348502, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6118270256671954, "success_rate.epoch.global": 0.8668461632603108, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9992461622807017, "tokens_p.mean_in_band": 0.6575520833333334, "tokens_rate.above_band": 0.9743589743589743, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.02564102564102564 }, { "epoch": 1.9913563829787235, "grad_norm": 69.30176794063327, "learning_rate": 1.6000199408188814e-07, "loss": 0.267, "step": 11980, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4574468085106383, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6129753914988815, "success_rate.epoch.env.math": 0.9823973176865046, "success_rate.epoch.env.sat": 0.10416666666666667, "success_rate.epoch.env.science": 0.9645631987871897, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6118302006493112, "success_rate.epoch.global": 0.8669113149847095, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9872063758389261, "tokens_p.mean_in_band": 0.7751116071428571, "tokens_rate.above_band": 0.9551282051282052, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.04487179487179487 }, { "epoch": 1.9921875, "grad_norm": 28.87783423063378, "learning_rate": 1.6000163559216661e-07, "loss": 0.2138, "step": 11985, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4603174603174603, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6134078212290502, "success_rate.epoch.env.math": 0.9824267782426779, "success_rate.epoch.env.sat": 0.10385756676557864, "success_rate.epoch.env.science": 0.9645699128457749, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6121056693993713, "success_rate.epoch.global": 0.8668866886688669, "success_rate.window.env.agentgym:alfworld": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.8, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9991379310344828, "tokens_p.mean_in_band": 0.6820913461538461, "tokens_rate.above_band": 0.9709821428571429, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.029017857142857144 }, { "epoch": 1.9930186170212765, "grad_norm": 87.3391163672724, "learning_rate": 1.6000131259464254e-07, "loss": 0.2637, "step": 11990, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4603174603174603, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6138392857142857, "success_rate.epoch.env.math": 0.9824414715719063, "success_rate.epoch.env.sat": 0.10385756676557864, "success_rate.epoch.env.science": 0.964590039765196, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6121480589206335, "success_rate.epoch.global": 0.8669679941363303, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9983431516936672, "tokens_p.mean_in_band": 0.7299107142857143, "tokens_rate.above_band": 0.9897959183673469, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.01020408163265306 }, { "epoch": 1.9938497340425532, "grad_norm": 58.57139015982192, "learning_rate": 1.6000102508988916e-07, "loss": 0.1188, "step": 11995, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4603174603174603, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5177664974619289, "success_rate.epoch.env.logic": 0.6138392857142857, "success_rate.epoch.env.math": 0.9824414715719063, "success_rate.epoch.env.sat": 0.10385756676557864, "success_rate.epoch.env.science": 0.9646235338630345, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6121511038386188, "success_rate.epoch.global": 0.8670492003418386, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.sat": 0.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9954971181556196, "tokens_p.mean_in_band": 0.646484375, "tokens_rate.above_band": 0.9692737430167597, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.030726256983240222 }, { "epoch": 1.9946808510638299, "grad_norm": 41.188555198370956, "learning_rate": 1.600007730784167e-07, "loss": 0.1612, "step": 12000, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4603174603174603, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5202020202020202, "success_rate.epoch.env.logic": 0.6142697881828316, "success_rate.epoch.env.math": 0.9824561403508771, "success_rate.epoch.env.sat": 0.10385756676557864, "success_rate.epoch.env.science": 0.9646435999243713, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6124148092976138, "success_rate.epoch.global": 0.8671465170184214, "success_rate.window.env.ded": 1.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9971076458752515, "tokens_p.mean_in_band": 0.7760416666666666, "tokens_rate.above_band": 0.9880715705765407, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.011928429423459244 }, { "epoch": 1.9955119680851063, "grad_norm": 340.9179509805603, "learning_rate": 1.6000055656067245e-07, "loss": 0.261, "step": 12005, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.4603174603174603, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5202020202020202, "success_rate.epoch.env.logic": 0.6146993318485523, "success_rate.epoch.env.math": 0.9824561403508771, "success_rate.epoch.env.sat": 0.10385756676557864, "success_rate.epoch.env.science": 0.9646569646569647, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6124550736974606, "success_rate.epoch.global": 0.8671951219512195, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 0.8333333333333334, "success_rate.window.env_macro_mean": 0.9444444444444445, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9963910761154856, "tokens_p.mean_below_band": 2.877698079828406e-13, "tokens_p.mean_in_band": 0.7782451923076923, "tokens_rate.above_band": 0.9819587628865979, "tokens_rate.below_band": 0.001288659793814433, "tokens_rate.in_band": 0.01675257731958763 }, { "epoch": 1.996343085106383, "grad_norm": 5.60976331122218, "learning_rate": 1.6000037553704063e-07, "loss": 0.1842, "step": 12010, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45789473684210524, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5202020202020202, "success_rate.epoch.env.logic": 0.6151279199110122, "success_rate.epoch.env.math": 0.9824561403508771, "success_rate.epoch.env.sat": 0.10385756676557864, "success_rate.epoch.env.science": 0.9646836638338054, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6122762158578192, "success_rate.epoch.global": 0.8671703631489154, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.75, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9974578373015873, "tokens_p.mean_in_band": 0.498046875, "tokens_rate.above_band": 0.998019801980198, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0019801980198019802 }, { "epoch": 1.9971742021276597, "grad_norm": 117.33582973237931, "learning_rate": 1.600002300078426e-07, "loss": 0.2275, "step": 12015, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45789473684210524, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5202020202020202, "success_rate.epoch.env.logic": 0.6151279199110122, "success_rate.epoch.env.math": 0.9824561403508771, "success_rate.epoch.env.sat": 0.10385756676557864, "success_rate.epoch.env.science": 0.964696998300925, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6122774280821028, "success_rate.epoch.global": 0.8672027290448343, "success_rate.window.env.agentgym:alfworld": 0.0, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 1.0, "tokens_p.mean_in_band": 0.73984375, "tokens_rate.above_band": 0.9950592885375494, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.004940711462450593 }, { "epoch": 1.9980053191489362, "grad_norm": 34.35684008035993, "learning_rate": 1.6000011997333656e-07, "loss": 0.2069, "step": 12020, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45789473684210524, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5202020202020202, "success_rate.epoch.env.logic": 0.6151279199110122, "success_rate.epoch.env.math": 0.9825145711906744, "success_rate.epoch.env.sat": 0.10385756676557864, "success_rate.epoch.env.science": 0.9647236370496133, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6122851616810561, "success_rate.epoch.global": 0.867332035053554, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 0.9885110294117647, "tokens_p.mean_in_band": 0.83984375, "tokens_rate.above_band": 0.9927007299270073, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.0072992700729927005 }, { "epoch": 1.9988364361702127, "grad_norm": 57.501124933761375, "learning_rate": 1.600000454337178e-07, "loss": 0.291, "step": 12025, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45789473684210524, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5202020202020202, "success_rate.epoch.env.logic": 0.6151279199110122, "success_rate.epoch.env.math": 0.9825436408977556, "success_rate.epoch.env.sat": 0.10385756676557864, "success_rate.epoch.env.science": 0.9647568790049001, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.612290826377635, "success_rate.epoch.global": 0.8674449714216222, "success_rate.window.env.logic": 0.0, "success_rate.window.env.math": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 0.6666666666666666, "success_rate.window.global": 0.9, "tokens_p.mean_above_band": 0.9979516006097561, "tokens_p.mean_in_band": 0.740625, "tokens_rate.above_band": 0.984984984984985, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.015015015015015015 }, { "epoch": 1.9996675531914894, "grad_norm": 118.56216423802522, "learning_rate": 1.6000000638911862e-07, "loss": 0.2603, "step": 12030, "success_rate.epoch.env.abd": 0.6122448979591837, "success_rate.epoch.env.agentgym:alfworld": 0.45789473684210524, "success_rate.epoch.env.agentgym:sciworld": 0.42857142857142855, "success_rate.epoch.env.agentgym:textcraft": 0.55, "success_rate.epoch.env.babyai": 1.0, "success_rate.epoch.env.ded": 0.5202020202020202, "success_rate.epoch.env.logic": 0.6151279199110122, "success_rate.epoch.env.math": 0.9825436408977556, "success_rate.epoch.env.sat": 0.10385756676557864, "success_rate.epoch.env.science": 0.964790058369422, "success_rate.epoch.env.webshop": 0.5, "success_rate.epoch.env_macro_mean": 0.6122938426835006, "success_rate.epoch.global": 0.8675255226057365, "success_rate.window.env.logic": 1.0, "success_rate.window.env.science": 1.0, "success_rate.window.env_macro_mean": 1.0, "success_rate.window.global": 1.0, "tokens_p.mean_above_band": 1.000187969924812, "tokens_p.mean_in_band": 0.7552083333333334, "tokens_rate.above_band": 0.9910581222056631, "tokens_rate.below_band": 0.0, "tokens_rate.in_band": 0.00894187779433681 } ], "logging_steps": 5, "max_steps": 12032, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 170545431364608.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }